diff --git a/.github/workflows/flow_amqp_consumer.yml b/.github/workflows/flow_amqp_consumer.yml index 6966922b8..a813b4a5a 100644 --- a/.github/workflows/flow_amqp_consumer.yml +++ b/.github/workflows/flow_amqp_consumer.yml @@ -5,8 +5,6 @@ on: types: [opened, edited, reopened] push: branches: - - development - - stable - issue_457_amqp_consumer paths-ignore: diff --git a/.github/workflows/flow_mqtt.yml b/.github/workflows/flow_mqtt.yml index 9789add50..097bd5694 100644 --- a/.github/workflows/flow_mqtt.yml +++ b/.github/workflows/flow_mqtt.yml @@ -5,7 +5,7 @@ on: types: [opened, edited, reopened] push: branches: - - development + - never paths-ignore: - '.github/**' - 'debian/changelog' diff --git a/debian/changelog b/debian/changelog index 00edfcaf0..e3654188c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,21 @@ -metpx-sr3 (3.00.52rc1) unstable; urgency=medium - +metpx-sr3 (3.00.52) unstable; urgency=medium + + * copernicus marine data store polling support ( #959 ) + * override baseUrl with message field ( for #951 ) + * os.kill for older python version instead of raise_signal #948 + * fix #955 error on cleanup when cache dir missing. + * gather/am fix to handle improperly parsed station names from getStation + * fix #953 PRECONDITION FAILED error on busy winnows + * above fix breaks/reopens #649 (cannot see queue size anymore.) + * AM renamer moved to gather for simplicity, also fixes there. + * fix: sr3 declare would fail when no admin.conf configured. + * misc. fixes with AMQP message acknowledgement. + * fix #934 found references to v2 documentation. Modernized. + * fix #942, #943 processing of rename and retrievePath headers. + * fix #940 display crash in sr3 overview + * fix #920 add _isRetry for duplicate suppression support in new retry + logic. + * more #918 bug fixes for AM renaming now matches Sundew perfectly. * nodupe_fileAgeX (X=Min or Max) name change to fileAgeX replacing inflight. * fix #907 declare exchange in admin.conf wasn't working. * fix #912 enable v2 style retry processing with new --retry_refilter flag. diff --git a/docs/source/Contribution/AMQPprimer.rst b/docs/source/Contribution/AMQPprimer.rst index 42c0aaee8..64b36dd93 100644 --- a/docs/source/Contribution/AMQPprimer.rst +++ b/docs/source/Contribution/AMQPprimer.rst @@ -84,7 +84,7 @@ Topic-based Exchanges ~~~~~~~~~~~~~~~~~~~~~ Topic-based exchanges are used exclusively. AMQP supports many other types of exchanges, -but sr_post have the topic sent in order to support server side filtering by using topic +but sr3_post have the topic sent in order to support server side filtering by using topic based filtering. At AMQP 1.0, topic-based exchanges (indeed all exchanges, are no longer defined.) Server-side filtering allows for much fewer topic hierarchies to be used, and for much more efficient subsciptions. diff --git a/docs/source/Contribution/Design.rst b/docs/source/Contribution/Design.rst index 2da881746..90e8a5ec5 100644 --- a/docs/source/Contribution/Design.rst +++ b/docs/source/Contribution/Design.rst @@ -125,13 +125,13 @@ as is provided by many free brokers, such as rabbitmq, often referred to as 0.8, 0.9 brokers are also likely to inter-operate well. In AMQP, many different actors can define communication parameters. To create a clearer -security model, sarracenia constrains that model: sr_post clients are not expected to declare +security model, sarracenia constrains that model: sr3_post clients are not expected to declare Exchanges. All clients are expected to use existing exchanges which have been declared by broker administrators. Client permissions are limited to creating queues for their own use, using agreed upon naming schemes. Queue for client: qc_.???? Topic-based exchanges are used exclusively. AMQP supports many other types of exchanges, -but sr_post have the topic sent in order to support server side filtering by using topic +but sr3_post have the topic sent in order to support server side filtering by using topic based filtering. The topics mirror the path of the files being announced, allowing straight-forward server-side filtering, to be augmented by client-side filtering on message reception. @@ -492,6 +492,12 @@ interaction with many layers, including the application. Disks are either dedic or a cluster file system is to be used. The application is expected to deal with those two cases. +most of the cluster management is taken care of by the sr3_tools project: + + https://github.com/MetPX/sr3_tools + +A review of that project to manage deployments regardless of topology, would be helpful. + Some document short-hand: Bunny @@ -610,14 +616,14 @@ Broker clustering is considered mature technology, and therefore relatively trus DD: Data Dissemination Configuration (AKA: Data Mart) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The sr deployment configuration is more of an end-point configuration. Each node is expected to -have a complete copy of all the data downloaded by all the nodes. Giving a unified view makes +The sr3 deployment configuration is more of an end-point configuration. Each node is expected to +have a complete copy of all the data downloaded by all the nodes. Giving a unified view makes it much more compatible with a variety of access methods, such as a file browser (over http, -or sftp) rather than being limited to AMQP notification messages. This is the type of view presented by +or sftp) rather than being limited to AMQP notification messages. This is the type of view presented by dd.weather.gc.ca. Given this view, all files must be fully reassembled on receipt, prior to announcing downstream -availability. files may have been fragmented for transfer across intervening pumps. +availability. Files may have been fragmented for transfer across intervening pumps. There are multiple options for achieving this end user visible effect, each with tradeoffs. In all cases, there is a load balancer in front of the nodes which distributes incoming @@ -625,10 +631,10 @@ connection requests to a node for processing. - multiple server nodes. Each standalone. - - sr - load balancer, just re-directs to a sr node? + - sr3 - load balancer, just re-directs to a sr3 node? dd1,dd2, - broker on sr node has connection thereafter. + broker on sr3 node has connection thereafter. Independent DD diff --git a/docs/source/Contribution/Development.rst b/docs/source/Contribution/Development.rst index fe29e9c5f..66f3cfda8 100644 --- a/docs/source/Contribution/Development.rst +++ b/docs/source/Contribution/Development.rst @@ -123,6 +123,11 @@ Planned by 2022/04/11: * launchpad has recipes to produce metpx-sr3 packages from the stable branch. + * The *MetPX Daily* repository is a snapshot of the development branch. + + * The *MetPX Pre-Release* repository should receive versions ending in rcX (release candidate) + + * The *MetPX* repository should only contain stable releases that have graduated from the rcX series. sr_insects @@ -633,7 +638,7 @@ Install a minimal localhost broker and configure rabbitmq test users:: sudo wget http://localhost:15672/cli/rabbitmqadmin sudo chmod 755 rabbitmqadmin - sr --users declare + sr3 --users declare .. Note:: @@ -761,7 +766,7 @@ and defines some fixed test clients that will be used during self-tests:: Starting flow_post on: /home/peter/sarra_devdocroot, saving pid in .flowpostpid Starting up all components (sr start)... done. - OK: sr start was successful + OK: sr3 start was successful Overall PASSED 4/4 checks passed! blacklab% @@ -809,7 +814,7 @@ Then check show it went with flow_check.sh:: test 4 success: max shovel (1022) and subscriber t_f30 (1022) should have about the same number of items test 5 success: count of truncated headers (1022) and subscribed messages (1022) should have about the same number of items test 6 success: count of downloads by subscribe t_f30 (1022) and messages received (1022) should be about the same - test 7 success: downloads by subscribe t_f30 (1022) and files posted by sr_watch (1022) should be about the same + test 7 success: downloads by subscribe t_f30 (1022) and files posted by watch (1022) should be about the same test 8 success: posted by watch(1022) and sent by sr_sender (1022) should be about the same test 9 success: 1022 of 1022: files sent with identical content to those downloaded by subscribe test 10 success: 1022 of 1022: poll test1_f62 and subscribe q_f71 run together. Should have equal results. @@ -830,7 +835,7 @@ thorough, it is good to know the flows are working. Note that the *fclean* subscriber looks at files in and keeps files around long enough for them to go through all the other tests. It does this by waiting a reasonable amount of time (45 seconds, the last time checked.) then it compares the file -that have been posted by sr_watch to the files created by downloading from it. As the *sample now* count proceeds, +that have been posted by watch to the files created by downloading from it. As the *sample now* count proceeds, it prints "OK" if the files downloaded are identical to the ones posted by sr_watch. The addition of fclean and the corresponding cfclean for the cflow_test, are broken. The default setup which uses *fclean* and *cfclean* ensures that only a few minutes worth of disk space is used at a given time, and allows for much longer tests. @@ -872,9 +877,9 @@ between each run of the flow test:: 2018-02-10 14:17:34,353 [INFO] info: report option not implemented, ignored. 2018-02-10 09:17:34,837 [INFO] sr_poll f62 cleanup 2018-02-10 09:17:34,845 [INFO] deleting exchange xs_tsource_poll (tsource@localhost) - 2018-02-10 09:17:35,115 [INFO] sr_post shim_f63 cleanup + 2018-02-10 09:17:35,115 [INFO] sr3_post shim_f63 cleanup 2018-02-10 09:17:35,122 [INFO] deleting exchange xs_tsource_shim (tsource@localhost) - 2018-02-10 09:17:35,394 [INFO] sr_post test2_f61 cleanup + 2018-02-10 09:17:35,394 [INFO] sr3_post test2_f61 cleanup 2018-02-10 09:17:35,402 [INFO] deleting exchange xs_tsource_post (tsource@localhost) 2018-02-10 09:17:35,659 [INFO] sr_report tsarra_f20 cleanup 2018-02-10 09:17:35,659 [INFO] AMQP broker(localhost) user(tfeed) vhost(/) @@ -936,7 +941,7 @@ between each run of the flow test:: 2018-02-10 09:17:39,927 [INFO] deleting queue q_tsource.sr_subscribe.u_sftp_f60.81353341.03950190 (tsource@localhost) 2018-02-10 09:17:40,196 [WARNING] option url deprecated please use post_base_url 2018-02-10 09:17:40,196 [WARNING] use post_broker to set broker - 2018-02-10 09:17:40,197 [INFO] sr_watch f40 cleanup + 2018-02-10 09:17:40,197 [INFO] watch f40 cleanup 2018-02-10 09:17:40,207 [INFO] deleting exchange xs_tsource (tsource@localhost) 2018-02-10 09:17:40,471 [INFO] sr_winnow t00_f10 cleanup 2018-02-10 09:17:40,471 [INFO] AMQP broker(localhost) user(tfeed) vhost(/) @@ -1038,7 +1043,7 @@ While it is running one can run flow_check.sh at any time:: test  4 success: max shovel (100008) and subscriber t_f30 (99953) should have about the same number of items test  5 success: count of truncated headers (100008) and subscribed messages (100008) should have about the same number of items test  6 success: count of downloads by subscribe t_f30 (99953) and messages received (100008) should be about the same - test  7 success: same downloads by subscribe t_f30 (199906) and files posted (add+remove) by sr_watch (199620) should be about the same + test  7 success: same downloads by subscribe t_f30 (199906) and files posted (add+remove) by watch (199620) should be about the same test  8 success: posted by watch(199620) and subscribed cp_f60 (99966) should be about half as many test  9 success: posted by watch(199620) and sent by sr_sender (199549) should be about the same test 10 success: 0 messages received that we don't know what happenned. @@ -1087,14 +1092,14 @@ Sometimes flow tests (especially for large numbers) get stuck because of problem To recover from this state without discarding the results of a long test, do:: ^C to interrupt the flow_check.sh 100000 - blacklab% sr stop + blacklab% sr3 stop blacklab% cd ~/.cache/sarra blacklab% ls */*/*retry* shovel/pclean_f90/sr_shovel_pclean_f90_0001.retry shovel/pclean_f92/sr_shovel_pclean_f92_0001.retry subscribe/t_f30/sr_subscribe_t_f30_0002.retry.new shovel/pclean_f91/sr_shovel_pclean_f91_0001.retry shovel/pclean_f92/sr_shovel_pclean_f92_0001.retry.state shovel/pclean_f91/sr_shovel_pclean_f91_0001.retry.state subscribe/q_f71/sr_subscribe_q_f71_0004.retry.new blacklab% rm */*/*retry* - blacklab% sr start + blacklab% sr3 start blacklab% blacklab% ./flow_check.sh 100000 Sufficient! @@ -1124,9 +1129,9 @@ To recover from this state without discarding the results of a long test, do:: test 4 success: sr_subscribe (98068) should have the same number of items as sarra (98075) | watch routing | - test 5 success: sr_watch (397354) should be 4 times subscribe t_f30 (98068) + test 5 success: watch (397354) should be 4 times subscribe t_f30 (98068) test 6 success: sr_sender (392737) should have about the same number - of items as sr_watch (397354) + of items as watch (397354) test 7 success: sr_subscribe u_sftp_f60 (361172) should have the same number of items as sr_sender (392737) test 8 success: sr_subscribe cp_f61 (361172) should have the same @@ -1137,11 +1142,11 @@ To recover from this state without discarding the results of a long test, do:: test 10 success: sr_subscribe q_f71 (195406) should have about the same number of items as sr_poll test1_f62(195408) | flow_post routing | - test 11 success: sr_post test2_f61 (193541) should have half the same + test 11 success: sr3_post test2_f61 (193541) should have half the same number of items of sr_sender(196368) test 12 success: sr_subscribe ftp_f70 (193541) should have about the - same number of items as sr_post test2_f61(193541) - test 13 success: sr_post test2_f61 (193541) should have about the same + same number of items as sr3_post test2_f61(193541) + test 13 success: sr3_post test2_f61 (193541) should have about the same number of items as shim_f63 195055 | py infos routing | test 14 success: sr_shovel pclean_f90 (97019) should have the same @@ -1467,6 +1472,14 @@ occurs that is identified as the released version. PyPi ~~~~ +Pypi does not distinguish between older and newer python releases. There is only one package +version for all supported versions. When uploading from a new OS, the versions in use on the +OS are inferred to be the minimum, and so installation on older operating systems may be blocked +by generated dependencies on overly modern versions. + +So when uploading to pypi, always do so from the oldest operating system where it needs to work. +upward compatibility is more likely than downward. + Pypi Credentials go in ~/.pypirc. Sample Content:: [pypi] @@ -1477,7 +1490,7 @@ Assuming pypi upload credentials are in place, uploading a new release used to b python3 setup.py bdist_wheel upload -on older systems, or on newer ones:: +on older systems, or on (python >= 3.7) newer ones:: python3 -m build --no-isolation twine upload dist/metpx_sarracenia-2.22.6-py3-none-any.whl dist/metpx_sarracenia-2.22.6.tar.gz diff --git a/docs/source/Contribution/Philosophy/AboutTime.ipynb b/docs/source/Contribution/Philosophy/AboutTime.ipynb index 6bdb53a6b..45eaab6ea 100644 --- a/docs/source/Contribution/Philosophy/AboutTime.ipynb +++ b/docs/source/Contribution/Philosophy/AboutTime.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "cc172731", + "metadata": {}, + "source": [ + "# STATUS: WIP\n", + "work in progress. Not worth reading yet. more notes than anything else." + ] + }, { "cell_type": "markdown", "id": "6395a549", @@ -81,14 +90,14 @@ "\n", "So once you have good information about the clouds that exist, and the rate they are moving at, and whether they are speeding up, or building up, or dissipating... then a forecaster is trained in physics, and applies the rules of physics to understand how clouds will move in the future. Essentially it uses the spreadsheet to calculate what the sky should look like one small step later in time, and then another step, and another, until you have arrived as far in time as needed for the forecast product.\n", "\n", - "At this point, the result is a spreadsheet. The forecaster then can use the spreadsheet and write descriptions of weather for their client, or have an automated process do that, or produce simlated ¨satellite\" imagery to show where the clouds till in the future.\n", + "At this point, the result is a spreadsheet. The forecaster then can use the spreadsheet and write descriptions of weather for their client, or have an automated process do that, or produce simulated ¨satellite\" imagery to show where the clouds till in the future.\n", "\n", "Restating things:\n", "\n", "* step 1: Acquisiion: gather a time series of data for kinds of data.\n", "* step 2: Assimilation: put all the different data into a pile of spreadsheets.\n", "* step 3: Numerical Model: hit calculate on the spreadsheet for the number of timestesp you need.\n", - "* step 4: Services: translating the spreadsheets back into things people can understand.\n", + "* step 4: Services: translating the spreadsheets back into things people can understand (maps, text, and simulated future images.)\n", "\n", "\n", "Looking at all these steps, it is obvious that they are extremely tedious for a human to do, and things that a computer, in principle should be great at. Rather than having a human look at web sites, and extract data,\n", @@ -107,10 +116,976 @@ "\n", "Well the quality of the result will vary with the quality of the input. To make a spreadsheet, the forecaster decides how big an volume of space to cover with each cell. The bigger the volume covered by each cell, the more you are taking different data points and averaging them to get one value for the the cell, so the \"fuzzier\" the pictures that result. \n", "\n", - "How big are the spreadsheets? 30 years ago, the \"high resolution\" spreadsheet covering North America had each cell was 150 km. on a side, At that time, computers were not big enough to cover the whole world. Today, the high resolution models are around 10km on a side (100 sq. km.) As the area of the earth is around 500 million sq. km. that means that the grid today should be about 5 million cells. per level, models typically have 25 levels representing different heights of air in the atmosphere, so that means 125 million cells to calculate.\n", + "How big are the spreadsheets? 30 years ago, the \"high resolution\" spreadsheet covering North America had each cell was 150 km. on a side, At that time, computers were not big enough to cover the whole world. Today, the high resolution models are around 10km on a side (100 sq. km.) As the area of the earth is around 500 million sq. km. that means that the grid today should be about 5 million cells. per level, models typically have 25 levels representing different heights of air in the atmosphere, so that means 125 million cells to calculate.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9881d7f4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/peter/.local/lib/python3.10/site-packages/plotly/express/_core.py:1753: UserWarning:\n", + "\n", + "Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + "\n", + "/home/peter/.local/lib/python3.10/site-packages/plotly/express/_core.py:1754: UserWarning:\n", + "\n", + "Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + "\n" + ] + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "base": [ + "12:05:00", + "12:45:00", + "13:05:00", + "13:45:00" + ], + "hovertemplate": "Start=%{base}
Finish=%{x}
Task=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "h", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + 2400000, + 1200000, + 2400000, + 1500000 + ], + "xaxis": "x", + "y": [ + "Acquisition", + "Assimilation", + "Model", + "Services" + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "overlay", + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "type": "date" + }, + "yaxis": { + "anchor": "x", + "autorange": "reversed", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Task" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import plotly.express as px\n", + "import pandas as pd\n", "\n", - "Each cell has information about different phenomena, called variables, say 25 of them. so to get a full picture, one needs 125 million x 25 variables x 8 bytes per variable = 6.2 billion variables in 25 gbytes of memory. To calculate the what the values of those variables are at the next unit in time would involve \n", - "\n" + "df = pd.DataFrame([\n", + " dict(Task=\"Acquisition\", Start='12:05:00', Finish='12:45:00'),\n", + " dict(Task=\"Assimilation\", Start='12:45:00', Finish='13:05:00'),\n", + " dict(Task=\"Model\", Start='13:05:00', Finish='13:45:00'),\n", + " dict(Task=\"Services\", Start='13:45:00', Finish='14:10:00')\n", + " \n", + "])\n", + "\n", + "fig = px.timeline(df, x_start=\"Start\", x_end=\"Finish\", y=\"Task\")\n", + "fig.update_yaxes(autorange=\"reversed\") # otherwise tasks are listed from the bottom up\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "3fdd3540", + "metadata": {}, + "source": [ + "Each cell has information about different phenomena, called variables, say 25 of them. so to get a full picture, one needs 125 million x 25 variables x 8 bytes per variable = 6.2 billion variables in about 50 gbytes of memory. To calculate the what the values of those variables are at the next unit in time involve many calculations using that data, and the prediction's accuracy involves some tradeoffs:\n", + "\n", + "The simpler the model, the simpler the math, the less calculation time to get a result. The more complicated the model & math, the better the result, but the longer it takes to do." ] }, { @@ -118,8 +1093,3137 @@ "id": "64fb191a", "metadata": {}, "source": [ - "* https://en.wikipedia.org/wiki/History_of_numerical_weather_prediction#cite_note-RFE-43 note on Weather model from 1989\n" + "* https://en.wikipedia.org/wiki/History_of_numerical_weather_prediction#cite_note-RFE-43 note on Weather model from 1989\n", + "\n", + "* \n", + "https://devops.com/dont-build-microservices-pursue-loose-coupling/\n" ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "958fed92", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import matplotlib.dates as mdates\n", + "from datetime import datetime\n", + "\n", + "try:\n", + " # Try to fetch a list of Matplotlib releases and their dates\n", + " # from https://api.github.com/repos/matplotlib/matplotlib/releases\n", + " import urllib.request\n", + " import json\n", + "\n", + " url = 'https://api.github.com/repos/matplotlib/matplotlib/releases'\n", + " url += '?per_page=100'\n", + " data = json.loads(urllib.request.urlopen(url, timeout=1).read().decode())\n", + "\n", + " dates = []\n", + " names = []\n", + " for item in data:\n", + " if 'rc' not in item['tag_name'] and 'b' not in item['tag_name']:\n", + " dates.append(item['published_at'].split(\"T\")[0])\n", + " names.append(item['tag_name'])\n", + " # Convert date strings (e.g. 2014-10-18) to datetime\n", + " dates = [datetime.strptime(d, \"%Y-%m-%d\") for d in dates]\n", + "\n", + "except Exception:\n", + " # In case the above fails, e.g. because of missing internet connection\n", + " # use the following lists as fallback.\n", + " names = ['v2.2.4', 'v3.0.3', 'v3.0.2', 'v3.0.1', 'v3.0.0', 'v2.2.3',\n", + " 'v2.2.2', 'v2.2.1', 'v2.2.0', 'v2.1.2', 'v2.1.1', 'v2.1.0',\n", + " 'v2.0.2', 'v2.0.1', 'v2.0.0', 'v1.5.3', 'v1.5.2', 'v1.5.1',\n", + " 'v1.5.0', 'v1.4.3', 'v1.4.2', 'v1.4.1', 'v1.4.0']\n", + "\n", + " dates = ['2019-02-26', '2019-02-26', '2018-11-10', '2018-11-10',\n", + " '2018-09-18', '2018-08-10', '2018-03-17', '2018-03-16',\n", + " '2018-03-06', '2018-01-18', '2017-12-10', '2017-10-07',\n", + " '2017-05-10', '2017-05-02', '2017-01-17', '2016-09-09',\n", + " '2016-07-03', '2016-01-10', '2015-10-29', '2015-02-16',\n", + " '2014-10-26', '2014-10-18', '2014-08-26']\n", + "\n", + " # Convert date strings (e.g. 2014-10-18) to datetime\n", + " dates = [datetime.strptime(d, \"%Y-%m-%d\") for d in dates]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "69ddd6c2", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Choose some nice levels\n", + "levels = np.tile([-5, 5, -3, 3, -1, 1],\n", + " int(np.ceil(len(dates)/6)))[:len(dates)]\n", + "\n", + "# Create figure and plot a stem plot with the date\n", + "fig, ax = plt.subplots(figsize=(8.8, 4), layout=\"constrained\")\n", + "ax.set(title=\"Matplotlib release dates\")\n", + "\n", + "ax.vlines(dates, 0, levels, color=\"tab:red\") # The vertical stems.\n", + "ax.plot(dates, np.zeros_like(dates), \"-o\",\n", + " color=\"k\", markerfacecolor=\"w\") # Baseline and markers on it.\n", + "\n", + "# annotate lines\n", + "for d, l, r in zip(dates, levels, names):\n", + " ax.annotate(r, xy=(d, l),\n", + " xytext=(-3, np.sign(l)*3), textcoords=\"offset points\",\n", + " horizontalalignment=\"right\",\n", + " verticalalignment=\"bottom\" if l > 0 else \"top\")\n", + "\n", + "# format x-axis with 4-month intervals\n", + "ax.xaxis.set_major_locator(mdates.MonthLocator(interval=4))\n", + "ax.xaxis.set_major_formatter(mdates.DateFormatter(\"%b %Y\"))\n", + "plt.setp(ax.get_xticklabels(), rotation=30, ha=\"right\")\n", + "\n", + "# remove y-axis and spines\n", + "ax.yaxis.set_visible(False)\n", + "ax.spines[[\"left\", \"top\", \"right\"]].set_visible(False)\n", + "\n", + "ax.margins(y=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d38bd681", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import datetime as dt\n", + "\n", + "# from https://www.datacamp.com/tutorial/how-to-make-gantt-chart-in-python-matplotlib\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6cc70c33", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " task team start end completion_frac\n", + "0 A R&D 2022-10-20 2022-10-31 1.00\n", + "1 B Accounting 2022-10-24 2022-10-28 1.00\n", + "2 C Sales 2022-10-26 2022-10-31 1.00\n", + "3 D Sales 2022-10-31 2022-11-08 1.00\n", + "4 E IT 2022-11-03 2022-11-09 1.00\n", + "5 F R&D 2022-11-07 2022-11-18 0.95\n", + "6 G IT 2022-11-10 2022-11-17 0.70\n", + "7 H Sales 2022-11-14 2022-11-22 0.35\n", + "8 I Accounting 2022-11-18 2022-11-23 0.10\n", + "9 J Accounting 2022-11-23 2022-12-01 0.00\n", + "10 K Sales 2022-11-28 2022-12-05 0.00\n", + "11 L IT 2022-11-30 2022-12-05 0.00\n" + ] + } + ], + "source": [ + "df = pd.DataFrame({'task': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L'],\n", + " 'team': ['R&D', 'Accounting', 'Sales', 'Sales', 'IT', 'R&D', 'IT', 'Sales', 'Accounting', 'Accounting', 'Sales', 'IT'],\n", + " 'start': pd.to_datetime(['20 Oct 2022', '24 Oct 2022', '26 Oct 2022', '31 Oct 2022', '3 Nov 2022', '7 Nov 2022', '10 Nov 2022', '14 Nov 2022', '18 Nov 2022', '23 Nov 2022', '28 Nov 2022', '30 Nov 2022']),\n", + " 'end': pd.to_datetime(['31 Oct 2022', '28 Oct 2022', '31 Oct 2022', '8 Nov 2022', '9 Nov 2022', '18 Nov 2022', '17 Nov 2022', '22 Nov 2022', '23 Nov 2022', '1 Dec 2022', '5 Dec 2022', '5 Dec 2022']),\n", + " 'completion_frac': [1, 1, 1, 1, 1, 0.95, 0.7, 0.35, 0.1, 0, 0, 0]})\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2bea7be6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " task team start end completion_frac days_to_start \\\n", + "0 A R&D 2022-10-20 2022-10-31 1.00 0 \n", + "1 B Accounting 2022-10-24 2022-10-28 1.00 4 \n", + "2 C Sales 2022-10-26 2022-10-31 1.00 6 \n", + "3 D Sales 2022-10-31 2022-11-08 1.00 11 \n", + "4 E IT 2022-11-03 2022-11-09 1.00 14 \n", + "5 F R&D 2022-11-07 2022-11-18 0.95 18 \n", + "6 G IT 2022-11-10 2022-11-17 0.70 21 \n", + "7 H Sales 2022-11-14 2022-11-22 0.35 25 \n", + "8 I Accounting 2022-11-18 2022-11-23 0.10 29 \n", + "9 J Accounting 2022-11-23 2022-12-01 0.00 34 \n", + "10 K Sales 2022-11-28 2022-12-05 0.00 39 \n", + "11 L IT 2022-11-30 2022-12-05 0.00 41 \n", + "\n", + " days_to_end task_duration completion_days \n", + "0 11 12 12.00 \n", + "1 8 5 5.00 \n", + "2 11 6 6.00 \n", + "3 19 9 9.00 \n", + "4 20 7 7.00 \n", + "5 29 12 11.40 \n", + "6 28 8 5.60 \n", + "7 33 9 3.15 \n", + "8 34 6 0.60 \n", + "9 42 9 0.00 \n", + "10 46 8 0.00 \n", + "11 46 6 0.00 \n" + ] + } + ], + "source": [ + "df['days_to_start'] = (df['start'] - df['start'].min()).dt.days\n", + "df['days_to_end'] = (df['end'] - df['start'].min()).dt.days\n", + "df['task_duration'] = df['days_to_end'] - df['days_to_start'] + 1 # to include also the end date\n", + "df['completion_days'] = df['completion_frac'] * df['task_duration']\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "de5e70ba", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.barh(y=df['task'], width=df['task_duration'], left=df['days_to_start'])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bf7f4335", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "base": [ + "2009-01-01", + "2009-03-05", + "2009-02-20" + ], + "hovertemplate": "Start=%{base}
Finish=%{x}
Task=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "h", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + 5011200000, + 3542400000, + 8553600000 + ], + "xaxis": "x", + "y": [ + "Job A", + "Job B", + "Job C" + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "overlay", + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "type": "date" + }, + "yaxis": { + "anchor": "x", + "autorange": "reversed", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Task" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import plotly.express as px\n", + "import pandas as pd\n", + "\n", + "df = pd.DataFrame([\n", + " dict(Task=\"Job A\", Start='2009-01-01', Finish='2009-02-28'),\n", + " dict(Task=\"Job B\", Start='2009-03-05', Finish='2009-04-15'),\n", + " dict(Task=\"Job C\", Start='2009-02-20', Finish='2009-05-30')\n", + "])\n", + "\n", + "fig = px.timeline(df, x_start=\"Start\", x_end=\"Finish\", y=\"Task\")\n", + "fig.update_yaxes(autorange=\"reversed\") # otherwise tasks are listed from the bottom up\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "663b0fcd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Task Start_time Duration End_time \\\n", + "0 Acquisition 2023-09-08 12:05:00 0 days 00:40:00 2023-09-08 12:45:00 \n", + "1 GOES images T-12 2023-09-08 12:05:00 0 days 00:05:00 2023-09-08 12:10:00 \n", + "2 GOES images T-11 2023-09-08 12:10:00 0 days 00:05:00 2023-09-08 12:15:00 \n", + "3 GOES images T-10 2023-09-08 12:15:00 0 days 00:05:00 2023-09-08 12:20:00 \n", + "4 GOES images T-09 2023-09-08 12:20:00 0 days 00:05:00 2023-09-08 12:25:00 \n", + "5 Assimilation 2023-09-08 12:45:00 0 days 00:30:00 2023-09-08 13:15:00 \n", + "6 Model 2023-09-08 13:15:00 0 days 00:45:00 2023-09-08 14:00:00 \n", + "\n", + " Start_with Start_after \n", + "0 NaN NaN \n", + "1 Acquisition NaN \n", + "2 NaN GOES images T-12 \n", + "3 NaN GOES images T-11 \n", + "4 NaN GOES images T-10 \n", + "5 NaN Acquisition \n", + "6 NaN Assimilation \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/peter/.local/lib/python3.10/site-packages/_plotly_utils/basevalidators.py:105: FutureWarning:\n", + "\n", + "The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result\n", + "\n" + ] + }, + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "base": [ + "2023-09-08T12:05:00", + "2023-09-08T12:05:00", + "2023-09-08T12:10:00", + "2023-09-08T12:15:00", + "2023-09-08T12:20:00", + "2023-09-08T12:45:00", + "2023-09-08T13:15:00" + ], + "hovertemplate": "Start_time=%{base}
End_time=%{x}
Task=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "h", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + 2400000, + 300000, + 300000, + 300000, + 300000, + 1800000, + 2700000 + ], + "xaxis": "x", + "y": [ + "Acquisition", + "GOES images T-12", + "GOES images T-11", + "GOES images T-10", + "GOES images T-09", + "Assimilation", + "Model" + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "overlay", + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "type": "date" + }, + "yaxis": { + "anchor": "x", + "autorange": "reversed", + "domain": [ + 0, + 1 + ], + "title": { + "text": "Task" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import plotly.express as px\n", + "import copy\n", + "import pandas as pd\n", + "import datetime as dt\n", + "\n", + "today=dt.datetime.now()\n", + "first_task_start=dt.time(hour=12,minute=5)\n", + "time_of_start=today.combine(today,first_task_start)\n", + "\n", + "df = pd.DataFrame([\n", + " dict(Task=\"Acquisition\", Start_time=time_of_start, Duration='00:40:00', End_time=today),\n", + " dict(Task=\"GOES images T-12\", Start_with='Acquisition', Duration='00:05:00', End_time=today),\n", + " dict(Task=\"GOES images T-11\", Start_after='GOES images T-12', Duration='00:05:00', End_time=today),\n", + " dict(Task=\"GOES images T-10\", Start_after='GOES images T-11', Duration='00:05:00', End_time=today),\n", + " dict(Task=\"GOES images T-09\", Start_after='GOES images T-10', Duration='00:05:00', End_time=today),\n", + " dict(Task=\"Assimilation\", Start_after='Acquisition', Duration='00:30:00'),\n", + " dict(Task=\"Model\", Start_after='Assimilation', Duration='00:45:00')\n", + "])\n", + "\n", + "df['Duration']=pd.to_timedelta(df['Duration'])\n", + "\n", + "for ti in df.index:\n", + " if pd.isna(df['Start_time'][ti]):\n", + " for predecessor in df.index:\n", + " if df['Task'][predecessor] == df['Start_after'][ti]:\n", + " df.loc[ti, 'Start_time'] = df['End_time'][predecessor]\n", + " elif df['Task'][predecessor] == df['Start_with'][ti]:\n", + " df.loc[ti, 'Start_time'] = df['Start_time'][predecessor]\n", + " else:\n", + " pd.to_datetime(df['Start_time'][ti])\n", + "\n", + " df.loc[ti, 'End_time'] = df['Start_time'][ti]+df['Duration'][ti]\n", + "\n", + "\n", + "print(df)\n", + "\n", + "\n", + "fig = px.timeline(df, x_start=\"Start_time\", x_end=\"End_time\", y=\"Task\")\n", + "fig.update_yaxes(autorange=\"reversed\") # otherwise tasks are listed from the bottom up\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "f8713f24", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "hovertemplate": "x=%{x}
y=%{y}", + "legendgroup": "", + "line": { + "color": "#636efa", + "dash": "solid" + }, + "marker": { + "symbol": "circle" + }, + "mode": "lines", + "name": "", + "orientation": "v", + "showlegend": false, + "type": "scatter", + "x": [ + 1, + 2, + 3, + 4, + 5 + ], + "xaxis": "x", + "y": [ + 2, + 4, + 1, + 6, + 3 + ], + "yaxis": "y" + } + ], + "layout": { + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "fillpattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "x" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "y" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import plotly.express as px\n", + "\n", + "# Create a simple line plot\n", + "data = {'x': [1, 2, 3, 4, 5], 'y': [2, 4, 1, 6, 3]}\n", + "fig = px.line(data, x='x', y='y')\n", + "\n", + "# Display the plot\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be2f421c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05119583", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -138,7 +4242,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/source/Contribution/Philosophy/Sarracenia_Algoritmic_Designs.ipynb b/docs/source/Contribution/Philosophy/Sarracenia_Algoritmic_Designs.ipynb index 4b8ab02b4..7f1108470 100644 --- a/docs/source/Contribution/Philosophy/Sarracenia_Algoritmic_Designs.ipynb +++ b/docs/source/Contribution/Philosophy/Sarracenia_Algoritmic_Designs.ipynb @@ -753,7 +753,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/source/Contribution/on_part_assembly.rst b/docs/source/Contribution/on_part_assembly.rst index eca599608..9d083b483 100644 --- a/docs/source/Contribution/on_part_assembly.rst +++ b/docs/source/Contribution/on_part_assembly.rst @@ -6,7 +6,7 @@ File Re-assembling Components ---------- -**sr_watch:** You can use sr_watch to watch a directory for incoming partition files (.Part) from sr_subscribe or sr_sender, both have the ability to send a file in partitions. In the config file for sr_watch the important parameters to include are: +**sr_watch:** You can use sr3_watch to watch a directory for incoming partition files (.Part) from sr_subscribe or sr_sender, both have the ability to send a file in partitions. In the config file for sr3_watch the important parameters to include are: - path - on_part /usr/lib/python3/dist-packages/sarra/plugins/part_file_assemble.py @@ -45,7 +45,7 @@ After being triggered by a downloaded part file: Testing ------- -Create an sr_watch config file according to the template above +Create an sr3_watch config file according to the template above Start the process by typing the following command: ```sr_watch foreground path/to/config_file.cfg``` Then create a subcriber config file and include ```inplace off``` so the file will be downloaded in parts diff --git a/docs/source/Contribution/v03.rst b/docs/source/Contribution/v03.rst index 20858811a..ff4ab0e62 100644 --- a/docs/source/Contribution/v03.rst +++ b/docs/source/Contribution/v03.rst @@ -487,7 +487,7 @@ With shovel and winnow replaced by new implementations, it passes the dynamic flow test, including the Retry module ported to sr3, and a number of v2 modules used as-is. -Completed an initial version of the sr_post component now (in sr3: flowcb.gather.file.File) +Completed an initial version of the sr3_post component now (in sr3: flowcb.gather.file.File) Now working on sr_poll, which will take a while because it involve refactoring: sr_file, sr_http, sr_ftp, sr_sftp into the transfer module @@ -705,7 +705,7 @@ Probably need to be settled before having anyone else dive in. likely equivalent to async, and multi-gather. * think about API by sub-classing flow... and having it auto-integrate - with sr entry point... hmm... likely look at this when updating + with sr3 entry point... hmm... likely look at this when updating Programmer's Guide. * more worklists? rename failed -> retry or deferred. Add a new failed @@ -726,7 +726,7 @@ FIXME are things left to the side that need to be seen to. * **RELEASE BLOCKER** hairy. #403 - sr_watch does not batch things. It just dumps an entire tree. + watch does not batch things. It just dumps an entire tree. This will need to be re-wored before release into an iterator style approach. so if you start in a tree with a million files, it will scan the entire million and present them as a single in memory worklist. This will have performance @@ -737,7 +737,7 @@ FIXME are things left to the side that need to be seen to. impact and delay to producing the first file is still there, but at least returns one batch at a time. -* **RELEASE BLOCKER** logs of sr_poll and sr_watch tend to get humungous way too quickly. #389 +* **RELEASE BLOCKER** logs of sr_poll and watch tend to get humungous way too quickly. #389 * try out jsonfile for building notification messages to post. can build json incrementally, #402 so you do not need to delete the _deleteOnPost elements (can just skip over them) @@ -784,7 +784,7 @@ Name the package metpx-sarra3 and have the python class directory be sarra3 (ins retry files have different formats? validate. ) So one can copy configurations from old to new and run both versions in parallel. The central entry point would be sr3 (rather than sr), and to avoid confusion the other entry points (sr_subscribe etc...) would be omitted -so that v2 code would work unchanged. Might require some tweaks to have the sr classes +so that v2 code would work unchanged. Might require some tweaks to have the sr3 classes ignore instances from the other versions. This is similar to python2 to python3 transition. Allows deployment of sr3 without having @@ -889,11 +889,11 @@ Features * properties/options for classes are now hierarchical, so can set debug to specific classes within app. -* sr ability to select multiple components and configurations to operate on. +* sr3 ability to select multiple components and configurations to operate on. -* sr list examples is now used to display examples separate from the installed ones. +* sr3 list examples is now used to display examples separate from the installed ones. -* sr show is now used to display the parsed configuration. +* sr3 show is now used to display the parsed configuration. * notification messages are acknowledged more quickly, should help with throughput. diff --git a/docs/source/Explanation/CommandLineGuide.rst b/docs/source/Explanation/CommandLineGuide.rst index a1fdde4e4..9133b1ca5 100644 --- a/docs/source/Explanation/CommandLineGuide.rst +++ b/docs/source/Explanation/CommandLineGuide.rst @@ -1138,7 +1138,7 @@ The pollUrl option specify what is needed to connect to the remote server The *pollUrl* should be set with the minimum required information... **sr_poll** uses *pollUrl* setting not only when polling, but also -in the sr_post notification messages produced. +in the sr3_post notification messages produced. For example, the user can set : diff --git a/docs/source/Explanation/DeploymentConsiderations.rst b/docs/source/Explanation/DeploymentConsiderations.rst index a5560178d..d5c4a0739 100644 --- a/docs/source/Explanation/DeploymentConsiderations.rst +++ b/docs/source/Explanation/DeploymentConsiderations.rst @@ -261,7 +261,7 @@ Subscribers Post, Notice, Notification, Advertisement, Announcement - These are AMQP messages build by sr_post, sr_poll, or sr_watch to let users + These are AMQP messages build by sr_post, sr_poll, or sr3_watch to let users know that a particular file is ready. The format of these AMQP messages is described by the `sr_post(7) <../Reference/sr3.1.html#post>`_ manual page. All of these words are used interchangeably. Advertisements at each step preserve the diff --git a/docs/source/Explanation/DetectFileReady.rst b/docs/source/Explanation/DetectFileReady.rst index a17e3eec3..6c710bd38 100644 --- a/docs/source/Explanation/DetectFileReady.rst +++ b/docs/source/Explanation/DetectFileReady.rst @@ -4,7 +4,7 @@ File Detection Strategies ========================= -The fundamental job of sr_watch is to notice when files are available to be transferred. +The fundamental job of watch is to notice when files are available to be transferred. The appropriate strategy varies according to: - the **number of files in the tree** to be monitored, @@ -16,7 +16,7 @@ watch where one is posting for an *sr_sarra* component, then use of the *delete* option will keep the number of files in directory at any one point small and minimize the time to notice new ones. In such optimal conditions, noticing files in a hundredth of a second is reasonable to expect. Any method -will work well for such trees, but the sr_watch defaults (inotify) are usually +will work well for such trees, but the watch defaults (inotify) are usually the lowest overhead. When the tree gets large, the decision can change based on a number of factors, @@ -43,7 +43,7 @@ File Detection Strategy Table |library | might be posted. | - most efficient. | | | - works on any size file tree. | - more complicated to setup. | |(LD_PRELOAD) | - very multi-threaded. | - use where python3 not available. | -| | - I/O by writer (better localized) | - no sr_watch needed. | +| | - I/O by writer (better localized) | - no watch needed. | |(in C) | - very multi-threaded (user processes)| - no plugins. | | | | | +-------------+---------------------------------------+--------------------------------------+ @@ -60,7 +60,7 @@ File Detection Strategy Table +-------------+---------------------------------------+--------------------------------------+ |sr_cpost |works like watch if sleep > 0 | - where python3 is hard to get. | | | | - where speed is critical. | -|(in C) | - faster than sr_watch | - where plugins not needed. | +|(in C) | - faster than watch | - where plugins not needed. | | | - uses less memory than sr_watch. | - same issues with tree size | | | - practical with a bit bigger trees. | as sr_watch, just a little later. | | | | (see following methods) | @@ -76,7 +76,7 @@ File Detection Strategy Table | | - Large trees mean long startup. |works great with 10000 files | |(in Python) | - each node in a cluster may need |only a few seconds startup. | | | to run an instance | | -| | - each sr_watch single threaded. |too slow for millions of files. | +| | - each watch single threaded. |too slow for millions of files. | +-------------+---------------------------------------+--------------------------------------+ |sr_watch with| | | |reject |Use Linux convention to *hide* files. |Sending to systems that | @@ -108,10 +108,10 @@ File Detection Strategy Table +-------------+---------------------------------------+--------------------------------------+ -sr_watch is sr_post with the added *sleep* option that will cause it to loop +sr_watch is sr3_post with the added *sleep* option that will cause it to loop over directories given as arguments. sr3_cpost is a C version that functions identically, except it is faster and uses much less memory, at the cost of the -loss of plugin support. With sr_watch (and sr3_cpost) The default method of +loss of plugin support. With a watch (and sr3_cpost), the default method of noticing changes in directories uses OS specific mechanisms (on Linux: INOTIFY) to recognize changes without having to scan the entire directory tree manually. Once primed, file changes are noticed instantaneously, but requires an @@ -135,8 +135,8 @@ be used if timeliness is a concern. In supercomputing clusters, distributed files systems are used, and the OS optimized methods for recognizing file modifications (INOTIFY on Linux) do not -cross node boundaries. To use sr_watch with the default strategy on a -directory in a compute cluster, one usually must have an sr_watch process +cross node boundaries. To use watch with the default strategy on a +directory in a compute cluster, one usually must have a watch process running on every node. If that is undesirable, then one can deploy it on a single node with *force_polling* but the timing will be constrained by the directory size. @@ -157,7 +157,7 @@ need to be posted to call it:: where *shimpost.conf* is an sr_cpost configuration file in the ~/.config/sarra/post/ directory. An sr_cpost configuration file is the same -as an sr_post one, except that plugins are not supported. With the shim +as an sr3_post one, except that plugins are not supported. With the shim library in place, whenever a file is written, the *accept/reject* clauses of the shimpost.conf file are consulted, and if accepted, the file is posted just as it would be by sr_watch. @@ -186,7 +186,7 @@ the entire file, such as *N* (SHA-512 of the file name only), but then one loses the ability to differentiate between versions of the file. note :: - should think about using N on the sr_watch, and having multi-instance shovels + should think about using N on the watch, and having multi-instance shovels recalculate checksums so that part becomes easily parallellizable. Should be straightforward, but not yet explored as a result of use of shim library. FIXME. @@ -194,7 +194,7 @@ A last consideration is that in many cases, other processes are writing files to directories being monitored by sr_watch. Failing to properly set file completion protocols is a common source of intermittent and difficult to diagnose file transfer issues. For reliable file transfers, it is critical -that both the writer and sr_watch agree on how to represent a file that +that both the writer and watch agree on how to represent a file that isn't complete. diff --git a/docs/source/Explanation/Glossary.rst b/docs/source/Explanation/Glossary.rst index 21e395f1c..25fd97a78 100644 --- a/docs/source/Explanation/Glossary.rst +++ b/docs/source/Explanation/Glossary.rst @@ -90,7 +90,7 @@ that they should advertise to send to. *FIXME* undefined so far. Post, Notice, Notification, Advertisement, Announcement ------------------------------------------------------- -These are AMQP messages build by sr_post, sr_poll, or sr_watch to let users +These are AMQP messages build by sr3_post, sr3 poll, or sr3 watch to let users know that a particular file is ready. The format of these AMQP messages is described by the `sr_post(7) <../Reference/sr_post.7.html>`_ manual page. All of these words are used interchangeably. Advertisements at each step preserve the diff --git a/docs/source/Explanation/History/HPC_Mirroring_Use_Case.rst b/docs/source/Explanation/History/HPC_Mirroring_Use_Case.rst index 9202d589d..35b37e761 100644 --- a/docs/source/Explanation/History/HPC_Mirroring_Use_Case.rst +++ b/docs/source/Explanation/History/HPC_Mirroring_Use_Case.rst @@ -155,11 +155,11 @@ run against the file system database at as high a rhythm as can be sustained (ar query) combined with sr_poll to announce the files modified (and thus eligible for copying). This is completely non-portable, but was expected to be much faster than file tree traversal. -Over the winter 2016/2017, both of these methods were implemented. The INOTIFY-based sr_watch was the +Over the winter 2016/2017, both of these methods were implemented. The INOTIFY-based sr3_watch was the fastest method (instantaneous), but the daemons were having stability and memory consumption problems, and they also took too long to startup (requires an initial tree traversal, which takes the same time as the rsync). While slower (taking longer to notice a file was modified), the GPFS policy had *acceptable* -performance and was far more reliable than the parallel sr_watch method, and by the spring, with deployment +performance and was far more reliable than the parallel sr3_watch method, and by the spring, with deployment expected for early July 2017, the GPFS policy approach was selected. As the migration progressed, the file systems grew in that they had more files in the trees, and the GPFS-policy diff --git a/docs/source/Explanation/History/mesh_gts.rst b/docs/source/Explanation/History/mesh_gts.rst index d27ec59a8..8798cf152 100644 --- a/docs/source/Explanation/History/mesh_gts.rst +++ b/docs/source/Explanation/History/mesh_gts.rst @@ -48,10 +48,10 @@ data using standardized open methods, with a straightforward mapping from the metadata. As the proposed implementation uses existing standards, the WMO does not need to define any additional ones, and interoperability and access for other players in the broader -society should be straightforward. The sr_post protocol, and +society should be straightforward. The sr3_post protocol, and a number of existing implementations of it, are a great fit. -While it is believed that sr_post protocol has great potential +While it is believed that sr3_post protocol has great potential to improve WMO data exchange, it will take a few years to adopt it, and prior to adoption, there needs to be agreement on the file tree content. Today, the next step would be to find some partner countries with which @@ -759,7 +759,7 @@ work or cost as they are inherent to the technologies proposed. Programmability/Interoperability -------------------------------- -A new application to process sr_post messages can be re-implemented if there +A new application to process sr3_post messages can be re-implemented if there is a desire to do so, as in addition to full documentation, source code for a handful of `implementations <../Overview.html#implementations>`_ (Python, C, Go, node.js), is readily publically available. diff --git a/docs/source/Explanation/History/talks/Sarra201702/script.txt b/docs/source/Explanation/History/talks/Sarra201702/script.txt index ac00e1244..34196f529 100644 --- a/docs/source/Explanation/History/talks/Sarra201702/script.txt +++ b/docs/source/Explanation/History/talks/Sarra201702/script.txt @@ -290,7 +290,7 @@ Application Still Alpha? * reports have not been fully implemented, so they might not work yet (defer?) * plugin API requires a little more work (nothing incompatible so far, but worried.) - * sr_watch and sr_post change usage over last few months. + * sr3_watch and sr3_post change usage over last few months. * Static trees * Proper interswitch routing. diff --git a/docs/source/Explanation/History/talks/SarraExplained/script.txt b/docs/source/Explanation/History/talks/SarraExplained/script.txt index e5e756218..117a05277 100644 --- a/docs/source/Explanation/History/talks/SarraExplained/script.txt +++ b/docs/source/Explanation/History/talks/SarraExplained/script.txt @@ -8,7 +8,7 @@ of internals. Let´s say someone wants to inject an image into the data pumping network. -We call people who inject data ´sources´, and a source runs the sr_post command, +We call people who inject data ´sources´, and a source runs the sr3_post command, which creates a message like the following and sends it to the AMQP broker running on a data pump: diff --git a/docs/source/Explanation/History/talks/SarraIntro/configs_A2B/configs4broadcast.txt b/docs/source/Explanation/History/talks/SarraIntro/configs_A2B/configs4broadcast.txt index 8bfd5bb01..25d47ac2c 100644 --- a/docs/source/Explanation/History/talks/SarraIntro/configs_A2B/configs4broadcast.txt +++ b/docs/source/Explanation/History/talks/SarraIntro/configs_A2B/configs4broadcast.txt @@ -13,7 +13,7 @@ It uses http throughout, which is simpler because there is less authentication. # #On Montreal's Server: -echo sr_watch -u http://pumpAadmin@AliceServer//home/CWAO -b amqps://Montreal@pumpWashington/ -to GISC_cache -dr /var/www/???? FIXME +echo sr3_watch -u http://pumpAadmin@AliceServer//home/CWAO -b amqps://Montreal@pumpWashington/ -to GISC_cache -dr /var/www/???? FIXME # Montreal has a web server running that permits anyone to download the data, including the # administrator of pumpWashington. diff --git a/docs/source/Explanation/History/talks/SarraIntro/script.txt b/docs/source/Explanation/History/talks/SarraIntro/script.txt index 7ed76a582..659328559 100644 --- a/docs/source/Explanation/History/talks/SarraIntro/script.txt +++ b/docs/source/Explanation/History/talks/SarraIntro/script.txt @@ -59,7 +59,7 @@ Layers Bkg,People,F0,Net,Pumps Layers Bkg,People,F0,Net,Pumps,tAl2Aamqpdesc,Al2Aa -Alice uses the sr_post command to say: "I want my frog DNA folder to go to B and F!" +Alice uses the sr3_post command to say: "I want my frog DNA folder to go to B and F!" Layers Bkg,People,F0,Net,Pumps,Al2Aa,tAl2Axfer,Al2Axl diff --git a/docs/source/Explanation/History/talks/status_20160427.txt b/docs/source/Explanation/History/talks/status_20160427.txt index b9a58ff55..449766b79 100644 --- a/docs/source/Explanation/History/talks/status_20160427.txt +++ b/docs/source/Explanation/History/talks/status_20160427.txt @@ -42,9 +42,9 @@ For data feeds, we need to feed both the site store 1 (ss1) and site store 2 (ss The sarracenia package has a tool called sr_watch, that uses the Linux kernel inotify feature to efficiently report on when a file has been modified or created. We encountered a few problems: - -- While sr_watch works well for dozens of hundreds of directories, and a few thousand files, when it gets into many thousands of directories, the memory usage can climb to over a gigabyte. + -- While sr3_watch works well for dozens of hundreds of directories, and a few thousand files, when it gets into many thousands of directories, the memory usage can climb to over a gigabyte. - -- The cluster file system in use, IBM's General Parallel File System, does not propagate inotify events to all nodes. In order to monitor a directory on a system with one hundred compute nodes, one must run an sr_watch on every node (consuming one hundred nodes, 1 gigabyte of RAM on each one... blech!) + -- The cluster file system in use, IBM's General Parallel File System, does not propagate inotify events to all nodes. In order to monitor a directory on a system with one hundred compute nodes, one must run an sr3_watch on every node (consuming one hundred nodes, 1 gigabyte of RAM on each one... blech!) While there are some approaches that could be taken to reduce memory usage, we ran out of time, and used an alternate approach. The GPFS has a number of hooks that allow us run queries of the number of files modified in a given tree since at given point in time. The API method for this is termed ''GPFS policies". So we periodically run a GPFS policy script and announce the results with sr_post. diff --git a/docs/source/Explanation/SarraPluginDev.rst b/docs/source/Explanation/SarraPluginDev.rst index 7b12ab6ee..cc7364354 100644 --- a/docs/source/Explanation/SarraPluginDev.rst +++ b/docs/source/Explanation/SarraPluginDev.rst @@ -1075,4 +1075,4 @@ which have some examples of such use. .. warning:: - **FIXME**, link to amqplib, or java bindings, and a pointer to the sr_post and sr_report section 7 man pages. + **FIXME**, link to amqplib, or java bindings, and a pointer to the sr3_post and sr_report section 7 man pages. diff --git a/docs/source/How2Guides/Admin.rst b/docs/source/How2Guides/Admin.rst index 22f56b320..485fc3ee1 100644 --- a/docs/source/How2Guides/Admin.rst +++ b/docs/source/How2Guides/Admin.rst @@ -73,17 +73,18 @@ The administrative processes perform validation of postings from sources. Once they are validated, forward the postings to the public exchanges for subscribers to access. The processes that are typically run on a broker: -- sr_audit - purge useless queues, create exchanges and users, set user permissions according to their roles. -- sr_poll - for sources without notification messages, revert to explicit polling for initial injection. -- sr_sarra - various configurations to pull data from other pumps to make it available from the local pump. -- sr_sender - send data to clients or other pumps that cannot pull data (usually because of firewalls.) -- sr_winnow - when there are multiple redundant sources of data, select the first one to arrive, and feed sr_sarra. -- sr_shovel - copy notification messages from pump to another, usually to feed sr_winnow. +- poll - for sources without notification messages, revert to explicit polling for initial injection. +- sarra - various configurations to pull data from other pumps to make it available from the local pump. +- sender - send data to clients or other pumps that cannot pull data (usually because of firewalls.) +- winnow - when there are multiple redundant sources of data, select the first one to arrive, and feed sarra. +- shovel - copy notification messages from pump to another, usually to feed winnow. +- flow - for gathering from different sorts of sources. + As for any other user, there may be any number of configurations to set up, and all of them may need to run at once. To do so easily, one can invoke:: - sr start + sr3 start to start all the files with named configurations of each component (sarra, subscribe, winnow, log, etc...) There are two users/roles that need to be set to use a pump. They are the admin and feeder options. @@ -101,8 +102,8 @@ for each account, and the various configuration files would use the appropriate -Housekeeping - sr_audit -~~~~~~~~~~~~~~~~~~~~~~~~ +Housekeeping - sr3 sanity +~~~~~~~~~~~~~~~~~~~~~~~~~ When a client connects to a broker, it creates a queue which is then bound to an exchange. The user can choose to have the client self-destruct when disconnected (*auto-delete*), or it can make @@ -140,11 +141,11 @@ only a single cpu to serve a queue. In such cases, creating multiple configurati (each with their own queue) dividing the traffic among them will allow further improvements in throughput. -sr_winnow is used to suppress duplicates. +winnow is used to suppress duplicates. **Note that the duplicate suppresion cache is local to each instance**. When N instances share a queue, the first time a posting is received, it could be picked by one instance, and if a duplicate one is received it would likely be picked up by another instance. **For effective duplicate suppression with instances**, -one must **deploy two layers of subscribers**. Use a **first layer of subscribers (sr_shovels)** with duplicate +one must **deploy two layers of subscribers**. Use a **first layer of subscribers (shovels)** with duplicate suppression turned off and output with *post_exchangeSplit*, which route posts by checksum to a **second layer of subscribers (sr_winnow) whose duplicate suppression caches are active.** @@ -155,18 +156,18 @@ Routing ------- The inter-connection of multiple pumps is done, on the data side, by daisy-chaining -sr_sarra and/or sr_sender configurations from one pump to the next. +sarra and/or sender configurations from one pump to the next. The *to_clusters*, and *source* headers are used for routing decisions implemented in the *msg_to_clusters*, and *msg_by_source* plugins respectively to be user by sender or sarra components to limit data transfers between pumps. For report routing, the *from_cluster* header is interpreted by the -*msg_from_cluster* plugin. Report messages are defined in the sr_report(7) man +*msg_from_cluster* plugin. Report messages are defined in the report(7) man page. They are emitted by *consumers* at the end, as well as *feeders* as the notification messages traverse pumps. Report messages are posted to the xs\_ exchange, and after validation sent to the xreport exchange by the shovel component -configurations created by sr_audit. +configurations created by *sr3 declare.* Messages in xreports destined for other clusters are routed to destinations by manually configured shovels. See the Reports_ section for more details. @@ -175,11 +176,11 @@ manually configured shovels. See the Reports_ section for more details. What is Going On? ----------------- -The sr_report command can be invoked to bind to 'xreport' instead of the +The sr3 declare report command can be invoked to bind to 'xreport' instead of the default user exchange to get report information for an entire broker. -Canned sr_report configuration with an *on_message* action can be configured to +Canned report configuration with an *on_message* action can be configured to gather statisical information. .. NOTE:: @@ -571,7 +572,7 @@ To add Alice using sr_audit, one would add the following to ~/.config/sarra/admi then add an appropriate amqp entry in ~/.config/sarra/credentials.conf to set the password, then run:: - sr --users declare + sr3 --users declare To remove users, just remove *declare source Alice* from the admin.conf file, and run:: @@ -649,7 +650,7 @@ Sarra from Another Pump Sarra works by having a downstream pump re-advertise products from an upstream one. Sarra needs all the configuration of a subscription, but also needs the configuration to post to the downstream broker. The feeder account on the broker is used for this sort of work, and is a semi-administrative user, able to publish data to any exchange. Assume apache is set up (not covered here) with a -document root of /var/www/html. The linux account we have created to run all the sr processes is '*sarra*', so we make sure +document root of /var/www/html. The linux account we have created to run all the sr3 processes is '*sarra*', so we make sure the document root is writable to those processes:: sarra@boule:~$ cd ~/.config/sarra/sarra diff --git a/docs/source/How2Guides/source.rst b/docs/source/How2Guides/source.rst index bfdfbc32c..d4cbf83f8 100644 --- a/docs/source/How2Guides/source.rst +++ b/docs/source/How2Guides/source.rst @@ -35,14 +35,14 @@ this one. Regardless of how it is done, injecting data means telling the pump where the data is so that it can be forwarded to and/or by the pump. This can be done by either -using the active and explicit sr_post command, or just using sr_watch on a directory. +using the active and explicit sr3_post command, or just using sr3 watch on a directory. Where there are large numbers of files, and/or tight timeliness constraints, invocation -of sr_post directly by the producer of the file is optimal, as sr_watch may provide +of sr3_post directly by the producer of the file is optimal, as sr3 watch may provide disappointing performance. Another explicit, but low frequency approach is the sr_poll command, which allows one to query remote systems to pull data into the network efficiently. -While sr_watch is written as an optimal directory watching system, there simply is no +While sr3 watch is written as an optimal directory watching system, there simply is no quick way to watch large (say, more than 100,000 files) directory trees. On dd.weather.gc.ca, as an example, there are 60 million files in about a million directories. To walk that directory tree once takes several hours. To find new files, @@ -104,10 +104,10 @@ credentials safely:: looking at ssh configuration files. just configure ssh to work, and sarracenia will as well. -So now the command line for sr_post is just the url for ddsr to retrieve the +So now the command line for sr3_post is just the url for ddsr to retrieve the file on grumpy:: - sr_post -post_broker amqp://guest:guest@localhost/ -post_base_dir /var/www/posts/ \ + sr3_post -post_broker amqp://guest:guest@localhost/ -post_base_dir /var/www/posts/ \ -post_base_url http://localhost:81/frog.dna 2016-01-20 14:53:49,014 [INFO] Output AMQP broker(localhost) user(guest) vhost(/) @@ -145,28 +145,28 @@ The command asks ddsr to retrieve the treefrog/frog.dna file by logging in to grumpy as peter (using the pump's private key) to retrieve it, and posting it on the pump, for forwarding to the other pump destinations. -Similar to sr_subscribe, one can also place configuration files in an sr_post specific directory:: +Similar to sr_subscribe, one can also place configuration files in an sr3_post specific directory:: - blacklab% sr_post edit dissem.conf + blacklab% sr3_post edit dissem.conf post_broker amqps://rnd@ddsr.cmc.ec.gc.ca/ post_base_url sftp://peter@grumpy and then:: - sr_post -c dissem -url treefrog/frog.dna + sr3_post -c dissem -url treefrog/frog.dna If there are different varieties of posting used, configurations can be saved for each one. .. warning:: **FIXME**: Need to do a real example. this made up stuff isn´t sufficiently helpful. - **FIXME**: sr_post does not accept config files right now, says the man page. True/False? + **FIXME**: sr3_post does not accept config files right now, says the man page. True/False? - sr_post command lines can be a lot simpler if it did. + sr3_post command lines can be a lot simpler if it did. sr_post typically returns immediately as its only job is to advise the pump of the availability -of files. The files are not transferred when sr_post returns, so one should not delete files +of files. The files are not transferred when sr3_post returns, so one should not delete files after posting without being sure the pump actually picked them up. .. NOTE:: @@ -197,7 +197,7 @@ is called *blacklab*, and the user on the server is *peter* running as peter on a directory is created under /var/www/project/outgoing, writable by peter, which results in a configuration like so:: - sr_watch edit project.conf + sr3 edit watch/project.conf broker amqp://feeder@localhost/ url http://blacklab/ @@ -206,15 +206,10 @@ which results in a configuration like so:: Then a watch is started:: - sr_watch start project + sr3 start watch/project -.. warning:: - **FIXME**: real example. - - **FIXME**: sr_watch was supposed to take configuration files, but might not have - been modified to that effect yet. -While sr_watch is running, any time a file is created in the *document_root* directory, +While watch is running, any time a file is created in the *document_root* directory, it will be announced to the pump (on localhost, ie. the server blacklab itself).:: cp frog.dna /var/www/project/outgoing @@ -251,7 +246,7 @@ to use it. Report Messages --------------- -If the sr_post worked, that means the pump accepted to take a look at your file. +If the sr3_post worked, that means the pump accepted to take a look at your file. To find out where your data goes to afterward, one needs to examine source log messages. It is also important to note that the initial pump, or any other pump downstream, may refuse to forward your data for various reasons, that will only @@ -423,7 +418,7 @@ to the C library like so:: export SR_POST_CONFIG=somepost.conf export LD_PRELOAD=libsrshim.so.1.0.0 -Where *somepost.conf* is a valid configuration that can be tested with sr_post to manually post a file. +Where *somepost.conf* is a valid configuration that can be tested with sr3_post to manually post a file. Any process invoked from a shell with these settings will have all calls to routines like close(2) intercepted by libsrshim. Libsrshim will check if the file is being written, and then apply the somepost configuration (accept/reject clauses) and post the file if it is appropriate. @@ -461,31 +456,31 @@ Example:: +++ echo 'FIXME: exec above fixes ... builtin i/o like redirection not being posted!' FIXME: exec above fixes ... builtin i/o like redirection not being posted! +++ bash -c 'echo "hoho" >>~/test/hoho' - 2017-10-21 20:20:44,092 [INFO] sr_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off + 2017-10-21 20:20:44,092 [INFO] sr3_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off 2017-10-21 20:20:44,092 [DEBUG] setting to_cluster: localhost 2017-10-21 20:20:44,092 [DEBUG] post_broker: amqp://tsource:@localhost:5672 2017-10-21 20:20:44,094 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch 2017-10-21 20:20:44,095 [DEBUG] isMatchingPattern: /home/peter/test/hoho matched mask: accept .* 2017-10-21 20:20:44,096 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch - 2017-10-21 20:20:44,096 [DEBUG] sr_post file2message called with: /home/peter/test/hoho sb=0x7ffef2aae2f0 islnk=0, isdir=0, isreg=1 + 2017-10-21 20:20:44,096 [DEBUG] sr3_post file2message called with: /home/peter/test/hoho sb=0x7ffef2aae2f0 islnk=0, isdir=0, isreg=1 2017-10-21 20:20:44,096 [INFO] published: 20171021202044.096 sftp://peter@localhost /home/peter/test/hoho topic=v02.post.home.peter.test sum=s,a0bcb70b771de1f614c724a86169288ee9dc749a6c0bbb9dd0f863c2b66531d21b65b81bd3d3ec4e345c2fea59032a1b4f3fe52317da3bf075374f7b699b10aa source=tsource to_clusters=localhost from_cluster=localhost mtime=20171021202002.304 atime=20171021202002.308 mode=0644 parts=1,2,1,0,0 +++ /usr/bin/python2.7 pyiotest - 2017-10-21 20:20:44,105 [INFO] sr_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off + 2017-10-21 20:20:44,105 [INFO] sr3_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off 2017-10-21 20:20:44,105 [DEBUG] setting to_cluster: localhost 2017-10-21 20:20:44,105 [DEBUG] post_broker: amqp://tsource:@localhost:5672 2017-10-21 20:20:44,107 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch 2017-10-21 20:20:44,107 [DEBUG] isMatchingPattern: /home/peter/src/sarracenia/c/hoho matched mask: accept .* 2017-10-21 20:20:44,108 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch - 2017-10-21 20:20:44,108 [DEBUG] sr_post file2message called with: /home/peter/src/sarracenia/c/hoho sb=0x7ffeb02838b0 islnk=0, isdir=0, isreg=1 + 2017-10-21 20:20:44,108 [DEBUG] sr3_post file2message called with: /home/peter/src/sarracenia/c/hoho sb=0x7ffeb02838b0 islnk=0, isdir=0, isreg=1 2017-10-21 20:20:44,108 [INFO] published: 20171021202044.108 sftp://peter@localhost /c/hoho topic=v02.post.c sum=s,9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 source=tsource to_clusters=localhost from_cluster=localhost mtime=20171021202044.101 atime=20171021202002.320 mode=0644 parts=1,5,1,0,0 +++ cp libsrshim.c /home/peter/test/hoho_my_darling.txt - 2017-10-21 20:20:44,112 [INFO] sr_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off + 2017-10-21 20:20:44,112 [INFO] sr3_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off 2017-10-21 20:20:44,112 [DEBUG] setting to_cluster: localhost 2017-10-21 20:20:44,112 [DEBUG] post_broker: amqp://tsource:@localhost:5672 2017-10-21 20:20:44,114 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch 2017-10-21 20:20:44,114 [DEBUG] isMatchingPattern: /home/peter/test/hoho_my_darling.txt matched mask: accept .* 2017-10-21 20:20:44,115 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch - 2017-10-21 20:20:44,115 [DEBUG] sr_post file2message called with: /home/peter/test/hoho_my_darling.txt sb=0x7ffc8250d950 islnk=0, isdir=0, isreg=1 + 2017-10-21 20:20:44,115 [DEBUG] sr3_post file2message called with: /home/peter/test/hoho_my_darling.txt sb=0x7ffc8250d950 islnk=0, isdir=0, isreg=1 2017-10-21 20:20:44,116 [INFO] published: 20171021202044.115 sftp://peter@localhost /home/peter/test/hoho_my_darling.txt topic=v02.post.home.peter.test sum=s,f5595a47339197c9e03e7b3c374d4f13e53e819b44f7f47b67bf1112e4bd6e01f2af2122e85eda5da633469dbfb0eaf2367314c32736ae8aa7819743f1772935 source=tsource to_clusters=localhost from_cluster=localhost mtime=20171021202044.109 atime=20171021202002.328 mode=0644 parts=1,15117,1,0,0 blacklab% diff --git a/docs/source/How2Guides/v2ToSr3.rst b/docs/source/How2Guides/v2ToSr3.rst index 051371255..c652ff5af 100644 --- a/docs/source/How2Guides/v2ToSr3.rst +++ b/docs/source/How2Guides/v2ToSr3.rst @@ -37,7 +37,7 @@ and implement, and are more flexible and powerful than the v2 mechanism. * v3 uses standard python syntax, not v2's strange *self.plugins*, *parent.logger*, and oh gee why doesn't *import* work? - * Standard python imports; Syntax errors are detected and reported *the normal way* + * Standard python imports: In v3, syntax errors are detected and reported *the normal way* * v3 classes are designed to be usable outside the CLI itself (see jupyter notebook examples) callable by application programmers in their own code, like any other python library. * v3 classes can be sub-classed to add core functionality, like new notification message or file @@ -71,7 +71,7 @@ In sr3, one can also use file globbing style specifications to ask for a command to be invoked on a group of configurations, wheras in v2, one could only operate on one at a time. .. caution:: - **sr3_post** is an exception to this change in that it works like v2's sr_post did, being + **sr3_post** is an exception to this change in that it works like v2's sr3_post did, being a tool for interactive posting. @@ -333,6 +333,7 @@ In general, v3 plugins: msg.parts msg['size'] just omit, use sarracenia.Message constructor. msg.sumflg msg['identity'] just omit, use sarracenia.Message constructor. msg.sumstr v2wrapper.sumstrFromMessage(msg) the literal string for a v2 checksum field. + msg.rename msg['rename'] In sr3, often better to use retrievePath and relPath parent.msg worklist.incoming v2 is 1 message at a time, sr3 has lists or messages. ================ ================================== ========================================================== @@ -426,7 +427,7 @@ In general, v3 plugins: The checksum is already performed when the new notification message is being generated so most likely any message fields such as **sumalgo** and other **algo** fields can be discarded. - For an example of using the message builder, look at `do_poll -> poll`_ + For an example of using the message builder, look at `do_poll -> poll or gather`_ * v3 plugins **rarely, involve subclassing of moth or transfer classes.** @@ -603,8 +604,8 @@ examples: -do_poll -> poll -~~~~~~~~~~~~~~~ +do_poll -> poll or gather +~~~~~~~~~~~~~~~~~~~~~~~~~ v2: call do_poll from plugin. @@ -617,8 +618,22 @@ v2: call do_poll from plugin. to do in each plugin. * poll_without_vip setting available. * parent.pulls is a list of *get* directives (which are different from accept) - * often paired with download\_something plugins where a partial message is built with the poll - and the download one is specialized to to the actual download. + +There is a common pattern in v2 polls, where a do_poll is paired with download\_something plugins +where a partial message is built with the poll and the download (or do_download) one is specialized +to do the actual download. Often in sr3 one can craft a message that will be successfully downloaded +with the built-in processing. + +An example of custom download processing is to build the directory tree to download into, combined with +the use of a *rename* header (in v2 parent.msg.rename) One can now use "retrievePath" to define the url +to issue to the server, and "relPath" to define where it will be downloaded to. *RelPath* includes +the whole directory tree, where *rename* is only for the filename. The combination of *relPath* and +*retrievePath* often provides enough functionality to obviate the need for a download entry point. + +There is another common pattern in v2 polls where, rather than querying a remote server to find out +what new products are available, in sr3 we have the concept of a scheduled flow, where there is a fixed +list of requests done periodically. See `Scheduled Flow` for more on that. For typical polls, the migration +to sr3 follows: v3: define poll in a flowcb class. @@ -640,6 +655,8 @@ v3: define poll in a flowcb class. * returns a list of notification messages to be filtered and posted. + * the *download* setting allows a poll to download in a single configuration without + requiring combination with a separate downloading configuration. To build a notification message, without a local file, use fromFileInfo sarracenia.message factory:: @@ -682,7 +699,6 @@ and at the end:: return gathered_messages - Virtual IP processing in poll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -700,6 +716,31 @@ to update their recent_files cache. examples: * flowcb/poll/airnow.py +In a v2 poll, output exchanges were sometimes quite popular exchanges (e.g. xpublic) +which would cause the duplicate_suppression queues in an sr3 poll to be much +larger than necessary. + +When using a poll in sr3, ideally the post_exchange is one dedicated to this +poll, so that the vip participants prime their duplicate suppression cache with +only items published by the poll. + + + +Scheduled Flow +~~~~~~~~~~~~~~ + +If there is a WISKIS ( https://www.kisters.net/wiski ) server, one needs to issue +time centric queries are regular intervals. so a *gather()* entry point is implemented +which returns a list of messages that a downloader will use to obtain the data. + +* https://github.com/MetPX/sarracenia/blob/development/sarracenia/examples/flow/opg.conf an example flow configuration for polling Ontario Power Generation sensors. + +* https://github.com/MetPX/sarracenia/blob/development/sarracenia/flowcb/scheduled/wiski.py The plugin used by the OPG configuration using the gather() entry point. + +Like a poll, one can use the *download* option to consume the messages by downloading in the same configuration, +or publish to an exchange for downloading by a separate subscriber or sarra to scale downloading. + + on_html_page -> subclass flowcb/poll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -925,6 +966,11 @@ create a flowCallback class with a *download* entry point. fields, and normal processing of the *retrievePath* field in the notification message will do a good download, no plugin required. +* In many poll situations (typically a plugin with a do_poll and do_download entry point), the sr3 + built-in downloading often "just works", the sr3 poll() or gather() entry point is typically configured + with a *retrievePath* to indicate the URL to get, and the relPath is set to indicate the file name + to download into. + DESTFNSCRIPT ~~~~~~~~~~~~ diff --git a/docs/source/Reference/sr_post.7.rst b/docs/source/Reference/sr_post.7.rst index 17f70c52f..dcb352820 100644 --- a/docs/source/Reference/sr_post.7.rst +++ b/docs/source/Reference/sr_post.7.rst @@ -43,7 +43,7 @@ SYNOPSIS Version 03 format of file change notification messages for sr_post. -An sr_post notification message consists of a topic, and the *BODY* +An sr3_post notification message consists of a topic, and the *BODY* **AMQP Topic:** *.{.}* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -595,9 +595,9 @@ EXAMPLE Another example --------------- -The post resulting from the following sr_watch command, noticing creation of the file 'foo':: +The post resulting from the following sr3 watch command, noticing creation of the file 'foo':: - sr_watch -pbu sftp://stanley@mysftpserver.com/ -path /data/shared/products/foo -pb amqp://broker.com + sr3_post --sleep 10 --pbu sftp://stanley@mysftpserver.com/ --path /data/shared/products/foo --pb amqp://broker.com Here, *sr_watch* checks if the file /data/shared/products/foo is modified. When it happens, *sr_watch* reads the file /data/shared/products/foo and calculates its checksum. diff --git a/docs/source/Tutorials/1_CLI_introduction.ipynb b/docs/source/Tutorials/1_CLI_introduction.ipynb index 405f291eb..179791c7f 100644 --- a/docs/source/Tutorials/1_CLI_introduction.ipynb +++ b/docs/source/Tutorials/1_CLI_introduction.ipynb @@ -7,7 +7,7 @@ "source": [ "# Downloading Using the Command Line\n", "\n", - "This [jupyter notebook](https://jupyter.org) introduces [Sarracenia version 3](https://metpx.github.io/sarracenia) usage from the command line (mostly on Linux, but should be similar on Windows and Mac also, main difference being different conventions for where preferences and logs are stored. This is probably the easiest way to work with Sarracenia. You configure a flow to download files into a directory, and you can read the directory to process the files there.\n" + "This [jupyter notebook](https://jupyter.org) introduces [Sarracenia version 3](https://metpx.github.io/sarracenia) usage from the command line (mostly on Linux, but should be similar on Windows and Mac also, main difference being different conventions for where preferences and logs are stored.) This is probably the easiest way to work with Sarracenia. You configure a flow to download files into a directory, and you can read the directory to process the files there.\n" ] }, { @@ -45,7 +45,7 @@ "\n", "The command line interface is called [sr3](../Reference/sr3.1.rst) (short for Sarracenia version 3). One defines\n", "flows to run using configuration files in a simple format: _keyword_ _value_ format.\n", - "there are example configurations to get you started:" + "There are example configurations to get you started:" ] }, { @@ -104,7 +104,7 @@ "id": "affecting-marking", "metadata": {}, "source": [ - "There are different kinds for flows: the examples are classified flow type (poll, post, sarra, sender, shovel...)\n", + "There are different kinds for flows: the examples are classified by flow type (poll, post, sarra, sender, shovel, etc.)\n", "A _subscribe_ is used by clients to download from a data pump. Let's pick one of those." ] }, @@ -134,15 +134,15 @@ "id": "overall-instruction", "metadata": {}, "source": [ - "The files that are active for you are place in ~/.config/sr3//config_name. You can browse there \n", - "and modify them with an editor if you like. You can do that also with _sr3 edit subscribe/hpfx_amis.conf_\n", + "The files that are active for you are placed in ~/.config/sr3/{{}}/config_name. You can browse there \n", + "and modify them with an editor if you like. You can also do that with _sr3 edit subscribe/hpfx_amis.conf_.\n", "\n", " # this is a feed of wmo bulletin (a set called AMIS in the old times)\n", "\n", " broker amqps://hpfx.collab.science.gc.ca/\n", " exchange xpublic\n", "\n", - " # instances: number of downloading processes to run at once. defaults to 1. Not enough for this case\n", + " # instances: number of downloading processes to run at once. Defaults to 1. Not enough for this case\n", " instances 5\n", " \n", " # expire, in operational use, should be longer than longest expected interruption\n", @@ -153,7 +153,7 @@ " mirror false\n", " directory /tmp/hpfx_amis/\n", "\n", - "added the messageCountMax, so it doesn't run forever." + "Add the messageCountMax, so it doesn't run forever:" ] }, { @@ -173,7 +173,7 @@ "metadata": {}, "source": [ "The root directory where files are to be placed needs to exist before you start.\n", - "the above commands are to configure on a linux machine, you might need something else on a mac or windows.\n", + "The above commands are to configure on a Linux machine, you might need something else on a mac or windows.\n", "\n", "You can then run a flow interactively with the _foreground_ action, and it will end quickly, like so:" ] @@ -273,7 +273,7 @@ "id": "foreign-european", "metadata": {}, "source": [ - "as you can see it downloaded five files to /tmp/amis.\n", + "As you can see, it downloaded five files to /tmp/amis.\n", "The _foreground_ action is intended to help with debugging, rather than real operations." ] }, @@ -312,7 +312,7 @@ "id": "rocky-unemployment", "metadata": {}, "source": [ - "There is 1 configuration in your list. You can have hundreds. The columns on the right refer to how many instances you have for each configuration. In the example above _instances_ is set to 5, so one would expect to see 5 running instances when it would be running. You can start specifc configuration with _sr3 start subscribe/*_ or start all active instances with: _sr3 start_" + "Above, you can see there is 1 configuration in your list. You can have hundreds. The columns on the right refer to how many instances you have for each configuration. In the example above, _instances_ is set to 5, so one would expect to see 5 running instances when it would be running. You can start specifc configurations with _sr3 start subscribe\*_ or start all active instances with: _sr3 start_" ] }, { @@ -344,11 +344,11 @@ "id": "leading-matthew", "metadata": {}, "source": [ - "When running in the background, output needs to go a log file. As we have only run this configuration file in the foreground, asking to see the log prints an error about the log being missing. This tells you that the logs are in the _~/.cache/sr3/log_ directory. Logs can be monitored in real-time with traditional tools such as _tail -f_ or _grep_.\n", + "When running in the background, output needs to go a log file. Since we have only ran this configuration file in the foreground, asking to see the log prints an error about the log being missing. This tells you that the logs are in the _~/.cache/sr3/log_ directory. Logs can be monitored in real-time with traditional tools such as _tail -f_ or _grep_.\n", "\n", "_sr3 stop_ does what you expect.\n", "\n", - "Processes can crash. In the _sr3 status_ output above, if the number of processes in the Run column is less than in the Exp (for Expected) one, then it means that some instances have crashed. you can repair it (just start the missing instances) with:\n", + "Processes can crash. In the _sr3 status_ output above, if the number of processes in the Run column is less than in the Exp (for Expected) one, then it means that some instances have crashed. You can repair it (just start the missing instances) with:\n", "\n", "_sr3 sanity_ -- start missing instances, also kill strays if any found.\n", "\n", @@ -357,22 +357,13 @@ "\n", "## Conclusion\n", "\n", - "If all you want to do is obtain data from a data pump in real-time, using the command line interface to control some processes that run all the time, so that they dump files in a certain directory is the easiest way to go.\n", + "If all you want to do is obtain data from a data pump in real-time, the easiest way to go is using the command line interface to control some processes that run all the time so that they dump files in a certain directory.\n", "\n", - "It isn't very efficient though. When you have large numbers of files to work with, and you want high speed processing, it is better, in the sense of lower cpu and i/o overhead, and in terms of speed of processing,\n", - "to have your own application informed of the arrival of files, rather than scanning a directory.\n", + "It isn't very efficient though. When dealing with a large number of files and aiming for high-speed processing, it’s more efficient to have your own application receive notifications about file arrivals rather than scanning a directory. This approach reduces CPU and I/O overhead while improving processing speed.\n", "\n", "The easiest way to do that is to add some callbacks to your flows. We'll cover that next." ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "artistic-purple", - "metadata": {}, - "outputs": [], - "source": [] - } ], "metadata": { "kernelspec": { diff --git a/docs/source/Tutorials/Setup_a_local_subscriber.rst b/docs/source/Tutorials/Setup_a_local_subscriber.rst index 10343b45f..af2d00a7b 100644 --- a/docs/source/Tutorials/Setup_a_local_subscriber.rst +++ b/docs/source/Tutorials/Setup_a_local_subscriber.rst @@ -1,7 +1,10 @@ ================================ Server Admin: A Local Subscriber ================================ -This example goes over how to subscribe to the swob files from the Environment Canada Weather office. + +This example goes over how to build a local pump, with a local broker, +subscribe to the swob files from the Environment Canada Weather office, +and republish them locally. :: @@ -30,7 +33,7 @@ Set user permissions for vhost for bob's configure:read:write:: Setting tags for user "bob" to [management] ... $ sudo rabbitmq-plugins enable rabbitmq_management - $ /etc/init.d/rabbitmq-server restart + $ systemctl restart rabbitmq-server For more on the different kinds of user tags, see `rabbitmq access and permissions. `_ Open http://localhost:15672/ in a web browser. @@ -38,7 +41,7 @@ Log in with the username/password created above. Click the ``Queues`` tab to monitor the progress from the broker's perspective. Back in terminal:: - $ mkdir .config/sarra/subscribe + $ mkdir -p .config/sarra/subscribe $ vi .config/sarra/subscribe/test-subscribe.conf broker amqp://bob:robert@localhost/ exchange xs_bob @@ -47,7 +50,7 @@ Back in terminal:: Setup the bits that post changes to the exchange:: - $ mkdir .config/sarra/watch + $ mkdir -p .config/sarra/watch $ vi $_/test-watch.conf post_broker amqp://bob:robert@localhost/ post_exchange xs_bob @@ -55,12 +58,12 @@ Setup the bits that post changes to the exchange:: events modify,create $ mkdir -p /tmp/sarra/{in,out}put - $ sr start - $ sr_watch log test-watch + $ sr3 start + $ sr3 log watch/test-watch --> All reporting normal.:: - $ sr_subscribe log test-subscribe + $ sr3 log subscribe/test-subscribe . . 2020-08-20 16:29:26,111 [ERROR] standard queue name based on: @@ -70,22 +73,22 @@ Setup the bits that post changes to the exchange:: no=1 --> Note the line with **[ERROR]**, it was unable to find the queue. -this is because the queue needs to first be created by sr_watch and since we started the +this is because the queue needs to first be created by the watch and since we started the subscriber and watch at the same time with '``sr start``' we ran into a small race condition. This was soon after resolved as the sr_subscribe has a 1 second retry time. -This can be confirmed with the 'RabbitMQ Queues' page showing a ``q_bob.sr_subscribe.test_subscribe. ...`` queue in the list.:: +This can be confirmed with the 'RabbitMQ Queues' page showing a ``q_bob.subscribe.test-subscribe. ...`` queue in the list.:: $ touch /tmp/sarra/input/testfile1.txt $ ls /tmp/sarra/input/ testfile1.txt $ ls /tmp/sarra/output/ testfile1.txt - $ sr_subscribe log test-subscribe + $ sr3 log subscribe/test-subscribe . . 2020-08-20 16:29:26,078 [INFO] file_log downloaded to: /tmp/sarra/output/testfile1.txt - $ sr_watch log test-watch + $ sr3 log watch/test-watch 2020-08-20 16:29:20,612 [INFO] post_log notice=20200820212920.611807823 file:/ /tmp/sarra/input/testfile1.txt headers={'to_clusters':'localhost', 'mtime':'20200820212920.0259232521', 'atime': '20200820212920.0259232521', 'mode': '644', 'parts': '1,0,1,0,0', 'sum':'d,d41d8cd98f00b204e9800998ecf8427e'} $ touch /tmp/sarra/input/testfile{2..9}.txt @@ -96,9 +99,9 @@ Now you can watch the files trickle into the output folder, also watch the 'RabbitMQ Queues' page receive and process AMQP messages. When all is completed you can shut down both the subscriber and watcher with:: - $ sr stop + $ sr3 stop ... - $ sr_subscribe cleanup test-subscribe + $ sr3 cleanup subscribe/test-subscribe ... Now the queue has been deleted from RabbitMQ and all services have been stopped. diff --git a/docs/source/Tutorials/Setup_a_remote_subscriber.rst b/docs/source/Tutorials/Setup_a_remote_subscriber.rst index e180e449d..a73818bff 100644 --- a/docs/source/Tutorials/Setup_a_remote_subscriber.rst +++ b/docs/source/Tutorials/Setup_a_remote_subscriber.rst @@ -14,7 +14,7 @@ Initialize the credentials storage in the `~/.config/sr3/credentials.conf` file: The format is a complete url on each line (`amqps://:@`). This credentials.conf file should be private (linux octal permissions: 0600). -.conf files placed in the ``~/.config/sr3/subscribe_directory`` will be automatically found by ``sr_subscribe``, rather than giving the full path. +.conf files placed in the ``~/.config/sr3/subscribe_directory`` will be automatically found by ``subscribe``, rather than giving the full path. The *edit* command starts the user's configured editor on the file to be created, in the correct directory:: @@ -25,7 +25,7 @@ The *edit* command starts the user's configured editor on the file to be created accept .* $ mkdir /tmp/swob_downloads $ sr3 status subscribe/swob - 2017-12-14 06:54:54,010 [INFO] sr_subscribe swob 01 is stopped + 2017-12-14 06:54:54,010 [INFO] subscribe swob 01 is stopped .. ERROR:: @@ -56,26 +56,26 @@ Now start up the newly created subscriber:: $ sr3 start swob 2015-12-03 06:53:35,268 [INFO] user_config = 0 ../swob.conf 2015-12-03 06:53:35,269 [INFO] instances 1 - 2015-12-03 06:53:35,270 [INFO] sr subscribe swob 0001 started + 2015-12-03 06:53:35,270 [INFO] sr3 subscribe swob 0001 started Activity can be monitored via log files in ``~/.cache/sr3/log/`` or with the *log* command:: $ sr3 log swob - 2015-12-03 06:53:35,635 [INFO] Binding queue q_anonymous.sr_subscribe.swob.21096474.62787751 with key v02.post.observations.swob-ml.# to exchange xpublic on broker amqps://anonymous@dd.weather.gc.ca/ + 2015-12-03 06:53:35,635 [INFO] Binding queue q_anonymous.subscribe.swob.21096474.62787751 with key v02.post.observations.swob-ml.# to exchange xpublic on broker amqps://anonymous@dd.weather.gc.ca/ 2015-12-03 17:32:01,834 [INFO] user_config = 1 ../swob.conf - 2015-12-03 17:32:01,835 [INFO] sr_subscribe start - 2015-12-03 17:32:01,835 [INFO] sr_subscribe run + 2015-12-03 17:32:01,835 [INFO] subscribe start + 2015-12-03 17:32:01,835 [INFO] subscribe run 2015-12-03 17:32:01,835 [INFO] AMQP broker(dd.weather.gc.ca) user(anonymous) vhost(/) 2015-12-03 17:32:01,835 [INFO] AMQP input : exchange(xpublic) topic(v02.post.observations.swob-ml.#) 2015-12-03 17:32:01,835 [INFO] AMQP output: exchange(xs_anonymous) topic(v02.report.#) - 2015-12-03 17:32:08,191 [INFO] Binding queue q_anonymous.sr_subscribe.swob.21096474.62787751 with key v02.post.observations.swob-ml.# to exchange xpublic on broker amqps://anonymous@dd.weather.gc.ca/ + 2015-12-03 17:32:08,191 [INFO] Binding queue q_anonymous.subscribe.swob.21096474.62787751 with key v02.post.observations.swob-ml.# to exchange xpublic on broker amqps://anonymous@dd.weather.gc.ca/ ``[Ctrl] + [C]`` to exit watching the logs. The startup log appears normal, indicating the authentication information was accepted. -``sr_subscribe`` will get the notification and download the file into the present working directory +``Subscribe`` will get the notification and download the file into the present working directory (unless otherwise specified in the configuration file). ---- @@ -94,7 +94,7 @@ Here is a failure:: 2015-12-03 17:32:30,786 [ERROR] Download failed http://dd2.weather.gc.ca/observations/swob-ml/20151203/CXFB/2015-12-03-2200-CXFB-AUTO-swob.xml 2015-12-03 17:32:30,787 [ERROR] Server couldn't fulfill the request. Error code: 404, Not Found -This message is not always a failure as ``sr_subscribe`` retries a few times before giving up. +This message is not always a failure as ``subscribe`` retries a few times before giving up. After a few minutes, here is what the download directory looks like:: $ ls -al | tail @@ -107,25 +107,25 @@ After a few minutes, here is what the download directory looks like:: -rw-rw-rw- 1 peter peter 7873 Dec 3 17:37 2015-12-03-2237-CL3G-AUTO-minute-swob.xml -rw-rw-rw- 1 peter peter 7037 Dec 3 17:37 2015-12-03-2237-CTBF-AUTO-minute-swob.xml -rw-rw-rw- 1 peter peter 7022 Dec 3 17:37 2015-12-03-2237-CTRY-AUTO-minute-swob.xml - -rw-rw-rw- 1 peter peter 122140 Dec 3 17:38 sr_subscribe_dd_swob_0001.log + -rw-rw-rw- 1 peter peter 122140 Dec 3 17:38 subscribe_dd_swob_0001.log Cleanup ~~~~~~~ To not download more files, stop the subscriber:: - $ sr_subscribe stop swob - 2015-12-03 17:32:22,219 [INFO] sr_subscribe swob 01 stopped + $ sr3 stop subscribe/swob + 2015-12-03 17:32:22,219 [INFO] subscribe swob 01 stopped -This however leaves the queue that ``sr_subscribe start`` setup on the broker active, +This however leaves the queue that ``sr3 start subscribe/swob`` setup on the broker active, as to allow a failed subscriber to attempt reconnecting without loosing progress. That is until the broker times out the queue and removes it. To tell the broker that we are finished with the queue, tell the subscriber to cleanup:: - $ sr_subscribe cleanup swob - 2015-12-03 17:32:22,008 [INFO] sr_subscribe swob cleanup + $ sr3 cleanup subscribe/swob + 2015-12-03 17:32:22,008 [INFO] subscribe swob cleanup 2015-12-03 17:32:22,008 [INFO] AMQP broker(dd.weatheer.gc.ca) user(anonymous) vhost() 2015-12-03 17:32:22,008 [INFO] Using amqp module (AMQP 0-9-1) - 2015-12-03 17:32:22,008 [INFO] deleting queue q_anonymous.sr_subscribe.swob.21096474.62787751 (anonymous@dd.weather.gc.ca) + 2015-12-03 17:32:22,008 [INFO] deleting queue q_anonymous.subscribe.swob.21096474.62787751 (anonymous@dd.weather.gc.ca) Best practice is to clear the queue when done as to lessen the load on the broker. diff --git a/docs/source/Tutorials/hello_world_server.txt b/docs/source/Tutorials/hello_world_server.txt index ed3d61fb4..5c0bf43b4 100644 --- a/docs/source/Tutorials/hello_world_server.txt +++ b/docs/source/Tutorials/hello_world_server.txt @@ -25,7 +25,7 @@ cat >rawall.conf <>~/test/hoho' - 2017-10-21 20:20:44,092 [INFO] sr_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off + 2017-10-21 20:20:44,092 [INFO] sr3_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off 2017-10-21 20:20:44,092 [DEBUG] setting to_cluster: localhost 2017-10-21 20:20:44,092 [DEBUG] post_broker: amqp://tsource:@localhost:5672 2017-10-21 20:20:44,094 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch 2017-10-21 20:20:44,095 [DEBUG] isMatchingPattern: /home/peter/test/hoho matched mask: accept .* 2017-10-21 20:20:44,096 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch - 2017-10-21 20:20:44,096 [DEBUG] sr_post file2message called with: /home/peter/test/hoho sb=0x7ffef2aae2f0 islnk=0, isdir=0, isreg=1 + 2017-10-21 20:20:44,096 [DEBUG] sr3_post file2message called with: /home/peter/test/hoho sb=0x7ffef2aae2f0 islnk=0, isdir=0, isreg=1 2017-10-21 20:20:44,096 [INFO] published: 20171021202044.096 sftp://peter@localhost /home/peter/test/hoho topic=v02.post.home.peter.test sum=s,a0bcb70b771de1f614c724a86169288ee9dc749a6c0bbb9dd0f863c2b66531d21b65b81bd3d3ec4e345c2fea59032a1b4f3fe52317da3bf075374f7b699b10aa source=tsource to_clusters=localhost from_cluster=localhost mtime=20171021202002.304 atime=20171021202002.308 mode=0644 parts=1,2,1,0,0 +++ /usr/bin/python2.7 pyiotest - 2017-10-21 20:20:44,105 [INFO] sr_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off + 2017-10-21 20:20:44,105 [INFO] sr3_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off 2017-10-21 20:20:44,105 [DEBUG] setting to_cluster: localhost 2017-10-21 20:20:44,105 [DEBUG] post_broker: amqp://tsource:@localhost:5672 2017-10-21 20:20:44,107 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch 2017-10-21 20:20:44,107 [DEBUG] isMatchingPattern: /home/peter/src/sarracenia/c/hoho matched mask: accept .* 2017-10-21 20:20:44,108 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch - 2017-10-21 20:20:44,108 [DEBUG] sr_post file2message called with: /home/peter/src/sarracenia/c/hoho sb=0x7ffeb02838b0 islnk=0, isdir=0, isreg=1 + 2017-10-21 20:20:44,108 [DEBUG] sr3_post file2message called with: /home/peter/src/sarracenia/c/hoho sb=0x7ffeb02838b0 islnk=0, isdir=0, isreg=1 2017-10-21 20:20:44,108 [INFO] published: 20171021202044.108 sftp://peter@localhost /c/hoho topic=v02.post.c sum=s,9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 source=tsource to_clusters=localhost from_cluster=localhost mtime=20171021202044.101 atime=20171021202002.320 mode=0644 parts=1,5,1,0,0 +++ cp libsrshim.c /home/peter/test/hoho_my_darling.txt - 2017-10-21 20:20:44,112 [INFO] sr_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off + 2017-10-21 20:20:44,112 [INFO] sr3_post settings: action=foreground log_level=1 follow_symlinks=no sleep=0 heartbeat=300 cache=0 cache_file=off 2017-10-21 20:20:44,112 [DEBUG] setting to_cluster: localhost 2017-10-21 20:20:44,112 [DEBUG] post_broker: amqp://tsource:@localhost:5672 2017-10-21 20:20:44,114 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch 2017-10-21 20:20:44,114 [DEBUG] isMatchingPattern: /home/peter/test/hoho_my_darling.txt matched mask: accept .* 2017-10-21 20:20:44,115 [DEBUG] connected to post broker amqp://tsource@localhost:5672/#xs_tsource_cpost_watch - 2017-10-21 20:20:44,115 [DEBUG] sr_post file2message called with: /home/peter/test/hoho_my_darling.txt sb=0x7ffc8250d950 islnk=0, isdir=0, isreg=1 + 2017-10-21 20:20:44,115 [DEBUG] sr3_post file2message called with: /home/peter/test/hoho_my_darling.txt sb=0x7ffc8250d950 islnk=0, isdir=0, isreg=1 2017-10-21 20:20:44,116 [INFO] published: 20171021202044.115 sftp://peter@localhost /home/peter/test/hoho_my_darling.txt topic=v02.post.home.peter.test sum=s,f5595a47339197c9e03e7b3c374d4f13e53e819b44f7f47b67bf1112e4bd6e01f2af2122e85eda5da633469dbfb0eaf2367314c32736ae8aa7819743f1772935 source=tsource to_clusters=localhost from_cluster=localhost mtime=20171021202044.109 atime=20171021202002.328 mode=0644 parts=1,15117,1,0,0 blacklab% diff --git a/docs/source/fr/CommentFaire/subscriber.rst b/docs/source/fr/CommentFaire/subscriber.rst index 4e84ac8a0..79cbd17dc 100644 --- a/docs/source/fr/CommentFaire/subscriber.rst +++ b/docs/source/fr/CommentFaire/subscriber.rst @@ -242,8 +242,8 @@ ce répertoire : dd_amis.conf et hpfx_amis.conf, on pourrait alors exécuter :: fractal% pour démarrer la configuration de téléchargement CMC. On peut utiliser -la commande sr pour démarrer/arrêter plusieurs configurations à la fois. -La commande sr passera par les répertoires par défaut et démarrera +la commande sr3 pour démarrer/arrêter plusieurs configurations à la fois. +La commande sr3 passera par les répertoires par défaut et démarrera toutes les configurations qu’y si trouve :: fractal% sr3 status diff --git a/docs/source/fr/CommentFaire/v2ASr3.rst b/docs/source/fr/CommentFaire/v2ASr3.rst index d8a827d5a..25288354b 100644 --- a/docs/source/fr/CommentFaire/v2ASr3.rst +++ b/docs/source/fr/CommentFaire/v2ASr3.rst @@ -70,7 +70,7 @@ Dans sr3, on peut également utiliser des spécifications de style de globbing d soit invoqué sur un groupe de configurations, alors que dans la v2, on ne pouvait fonctionner que sur une à la fois. .. note:: - **sr3_post** est une exception à ce changement parce qu'il fonctionne comme sr_post de la v2, étant + **sr3_post** est une exception à ce changement parce qu'il fonctionne comme sr3_post de la v2, étant un outil d’affichage interactif. Ce qui fonctionnera sans changement @@ -281,9 +281,9 @@ En général, les plugins v3: Chaque message de notification v3 agit comme un dictionnaire python. Ci-dessous un mappage de table champs de la représentation sarra v2 à celle de sr3 : - ================ =================== =========================================================== + ================ =================== ============================================================= v2 sr3 Notes - ================ =================== =========================================================== + ================ =================== ============================================================= msg.pubtime msg['pubTime'] quand le message a été initialement publié msg.baseurl msg['baseUrl'] racine de l'arborescence url du fichier annoncé. msg.relpath msg['relPath'] chemin relatif concaténé à baseUrl pour le chemin canonique @@ -299,8 +299,9 @@ En général, les plugins v3: msg.logger logger les journeaux fonctionnent ¨normalement" pour python msg.parts msg['size'] oublie ca, utilise une constructeur de sarracenia.Message msg.sumflg msg['identity'] oublie ca, utilise une constructeur de sarracenia.Message + msg.rename msg['rename'] En sr3, souvent mieux d'utiliser: *retrievePath* et *relPath* parent.msg worklist.incoming sr3 traite des groupe des messages, pas individuelement - ================ =================== =========================================================== + ================ =================== ============================================================= * pubTime, baseUrl, relPath, retrievePath, size, identity, sont tous des champs de message standard mieux décrit dans `sr_post(7) <../Reference/sr_post.7.html>`_ @@ -570,20 +571,41 @@ v2: appelez do_poll à partir du plugin. à faire dans chaque plugin. * paramètre poll_without_vip disponible. +Dans plusieurs sondages v2, un do_poll est associé à un point d'entrée download\_quelquechose +où un message partiel est construit avec le sondage et celui du téléchargement (ou do_download) est spécialisé +pour effectuer le téléchargement proprement dit. Souvent, dans SR3, on peut créer un message qui sera téléchargé avec succès +avec le traitement de téléchargement intégré. + +Un exemple de traitement de téléchargement personnalisé consiste à créer l'arborescence de répertoires dans laquelle télécharger, combinée avec +l'utilisation d'un en-tête *rename* (en v2 parent.msg.rename) On peut désormais utiliser "retrievePath" pour définir l'url +pour soumettre au serveur, et "relPath" pour définir où il sera placé localment. *RelPath* inclut +toute l'arborescence des répertoires, où *rename* est uniquement pour le nom du fichier. La combinaison de *relPath* et +*retrievePath* fournit souvent suffisamment de fonctionnalités pour éviter le besoin d'un point d'entrée de téléchargement. + +Il existe un autre modèle courant dans les sondages v2 où, plutôt que d'interroger un serveur distant pour le savoir +quels nouveaux produits sont disponibles, dans sr3 nous avons le concept de flux programmé, où il y a un délai fixe +liste des demandes effectuées périodiquement. Voir « Flux programmé » pour en savoir plus à ce sujet. Pour les sondages typiques, la migration +à sr3 suit : + v3: définir poll dans une classe flowcb. * le sondage n’est exécuté que lorsque has_vip est true. - * le point d’entrée registered_as() est discutable + * le point d’entrée registered_as() est inutile. * toujours rassembler les exécutions, et est utilisé pour s’abonner à post effectuée par le nœud qui a le vip, permettant a la cache nodupe d’être maintenu à jour. * API définie pour créer des messages de notification à partir de données de fichier, quel que soit le format du message de notification. + * get est disparu, les *poll* utilisent accept/reject comme les autres composants. + * renvoie une liste de messages de notification à filtrer et à publier. + * l´option *download* permet un sondage (poll ou flow) de télécharger les données dans une seul configuration. + En v2, il faullt combiner avec une autre configuration pour effectuer le téléchargement. + Pour créer un message de notification, sans fichier local, utilisez fromFileInfo sarracenia.message factory:: @@ -642,6 +664,31 @@ pour mettre à jour leur cache recent_files. exemples: * flowcb/poll/airnow.py + +Dans un sondage v2, les échanges de sortie étaient parfois des échanges assez populaires (par exemple xpublic) +ce qui rendrait les files d'attente duplicate_suppression dans un sondage sr3 plus grand que nécessaire. + +Lorsque vous utilisez un sondage dans sr3, idéalement, le post_exchange est dédié à cet +sondage, afin que les participants VIP minimisent la taille de leur cache de suppression des doublons +en le chargeant uniquement des éléments publiés par le sondage. + + + +Flux programmé +~~~~~~~~~~~~~~ + +S'il existe un serveur WISKIS ( https://www.kisters.net/wiski ), il faut émettre +Les requêtes, souvent centrées sur le temps sont à intervalles réguliers. donc un point d'entrée *gather()* +est implémenté qui renvoie une liste de messages qu'un téléchargeur utilisera pour obtenir les données. + +* https://github.com/MetPX/sarracenia/blob/development/sarracenia/examples/flow/opg.conf un exemple de configuration de flux pour interroger les capteurs d'Ontario Power Generation. + +* https://github.com/MetPX/sarracenia/blob/development/sarracenia/flowcb/scheduled/wiski.py Le plugin utilisé par la configuration OPG utilisant le point d'entrée rassemble(). + +Comme un sondage, on peut utiliser l'option *download* pour consommer les messages en les +téléchargeant dans la même configuration, ou publier sur un échange pour téléchargement +par un *subscribe* ou *sarra* pour plus de performance. + on_html_page -> sous-classement de flowcb/poll ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -861,6 +908,10 @@ créer une classe flowCallback avec un point d’entrée *download*. et le traitement normal du champ *retrievePath* dans le message de notification fera un bon téléchargement, aucun plugin est requis. +* Souvent, dans les sondages v2, plutôt que d'interroger un serveur distant pour savoir quels nouveaux produits + sont disponibles, dans sr3 nous avons le concept de flux planifié, où il y a une liste fixe de requêtes effectuées + périodiquement. Voir « Flux programmé » pour en savoir plus à ce sujet. Pour les sondages typiques, la migration vers sr3 suit : + DESTFNSCRIPT ~~~~~~~~~~~~ diff --git a/docs/source/fr/Contribution/AMQPprimer.rst b/docs/source/fr/Contribution/AMQPprimer.rst index 38b14c384..4968b5a25 100644 --- a/docs/source/fr/Contribution/AMQPprimer.rst +++ b/docs/source/fr/Contribution/AMQPprimer.rst @@ -86,7 +86,7 @@ les travailleurs, mais pas exactement la fil d’attente partagée auto-équilib ~~~~~~~~~~~~~~~~~~~~ Les échanges thématiques sont utilisés exclusivement. AMQP prend en charge de nombreux autres -types d’échanges, mais la rubrique de sr_post est envoyée afin de prendre en charge le filtrage +types d’échanges, mais la rubrique de sr3_post est envoyée afin de prendre en charge le filtrage côté serveur à l’aide du filtrage par rubrique. À l’AMQP 1.0, les échanges thématiques (en fait, tous les échanges ne sont plus définis). Le filtrage côté serveur permet d’utiliser beaucoup moins de hiérarchies de rubriques et d’utiliser des sous-divisions beaucoup plus efficaces. diff --git a/docs/source/fr/Contribution/Design.rst b/docs/source/fr/Contribution/Design.rst index 63fcecaac..d53d3ec6d 100644 --- a/docs/source/fr/Contribution/Design.rst +++ b/docs/source/fr/Contribution/Design.rst @@ -130,14 +130,14 @@ gratuits, tels que RabbitMQ, souvent appelé 0,8, mais 0,9 et post 0,9. Les courtiers sont également susceptibles de bien interopérer. Dans AMQP, de nombreux acteurs différents peuvent définir des paramètres de communication. Pour créer un -modèle de sécurité, Sarracenia contraint ce modèle : les clients sr_post ne sont pas censés déclarer +modèle de sécurité, Sarracenia contraint ce modèle : les clients sr3_post ne sont pas censés déclarer des échanges. Tous les clients sont censés utiliser les échanges existants qui ont été déclarés par les administrateurs de courtiers. Les autorisations client sont limitées à la création de files d’attente pour leur propre usage, en utilisant des schémas de nommage convenus. File d’attente pour le client : qc_.???? Les échanges topic-based sont utilisés exclusivement. AMQP prend en charge de nombreux autres types d’échanges, -mais sr_post envoye la rubrique afin de prendre en charge le filtrage côté serveur à l’aide du topic +mais sr3_post envoye la rubrique afin de prendre en charge le filtrage côté serveur à l’aide du topic basé sur le filtrage. Les rubriques reflètent le chemin d’accès des fichiers annoncés, ce qui permet un filtrage direct côté serveur, complété par un filtrage côté client à la réception des messages. @@ -629,7 +629,7 @@ Le regroupement de courtiers est considéré comme une technologie mature et don DD : Configuration de la diffusion des données (AKA : Data Mart) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -La configuration de déploiement sr est davantage une configuration de point de terminaison. Chaque nœud est censé : +La configuration de déploiement sr3 est davantage une configuration de point de terminaison. Chaque nœud est censé : Avoir une copie complète de toutes les données téléchargées par tous les nœuds. Donner une vue unifiée rend ca beaucoup plus compatible avec une variété de méthodes d’accès, telles qu’un navigateur de fichiers (sur HTTP, ou sftp) plutôt que de se limiter aux messages de notification AMQP. C’est le type de vue présenté par diff --git "a/docs/source/fr/Contribution/D\303\251veloppement.rst" "b/docs/source/fr/Contribution/D\303\251veloppement.rst" index 191ba2ed6..606e571b8 100644 --- "a/docs/source/fr/Contribution/D\303\251veloppement.rst" +++ "b/docs/source/fr/Contribution/D\303\251veloppement.rst" @@ -635,7 +635,7 @@ Installez un courtier localhost minimal et configurez les utilisateurs de test r sudo wget http://localhost:15672/cli/rabbitmqadmin sudo chmod 755 rabbitmqadmin - sr --users declare + sr3 --users declare .. Note:: @@ -679,7 +679,7 @@ Besoin du package suivant pour cela:: sudo apt-get install python3-pyftpdlib python3-paramiko -Le script d’installation démarre un serveur Web trivial, un serveur ftp et un démon que sr_post appelle. +Le script d’installation démarre un serveur Web trivial, un serveur ftp et un démon que sr3_post appelle. Il teste également les composants C, qui doivent également avoir déjà été installés. et définit certains clients de test fixes qui seront utilisés lors des auto-tests :: @@ -764,7 +764,7 @@ et définit certains clients de test fixes qui seront utilisés lors des auto-te Starting flow_post on: /home/peter/sarra_devdocroot, saving pid in .flowpostpid Starting up all components (sr start)... done. - OK: sr start was successful + OK: sr3 start was successful Overall PASSED 4/4 checks passed! blacklab% @@ -813,7 +813,7 @@ Ensuite, vérifiez avec flow_check.sh:: test 4 success: max shovel (1022) and subscriber t_f30 (1022) should have about the same number of items test 5 success: count of truncated headers (1022) and subscribed messages (1022) should have about the same number of items test 6 success: count of downloads by subscribe t_f30 (1022) and messages received (1022) should be about the same - test 7 success: downloads by subscribe t_f30 (1022) and files posted by sr_watch (1022) should be about the same + test 7 success: downloads by subscribe t_f30 (1022) and files posted by sr3_watch (1022) should be about the same test 8 success: posted by watch(1022) and sent by sr_sender (1022) should be about the same test 9 success: 1022 of 1022: files sent with identical content to those downloaded by subscribe test 10 success: 1022 of 1022: poll test1_f62 and subscribe q_f71 run together. Should have equal results. @@ -834,7 +834,7 @@ approfondie, il est bon de savoir que les flux fonctionnent. Notez que l’abonné *fclean* examine les fichiers et les conserve suffisamment longtemps pour qu’ils puissent parcourir tous les autres tests. Il le fait en attendant un délai raisonnable (45 secondes, la dernière fois -vérifiée), puis il compare le fichier qui a été posté par sr_watch aux fichiers créés en téléchargeant à partir +vérifiée), puis il compare le fichier qui a été posté par sr3_watch aux fichiers créés en téléchargeant à partir de celui-ci. Au fur et à mesure que le dénombrement *sample now* progresse, il imprime "OK" si les fichiers téléchargés sont identiques à ceux postés par sr_watch. L’ajout de fclean et cfclean correspondant pour les cflow_test sont cassés. La configuration par défaut qui utilise *fclean* et *cfclean* garantit que seulement @@ -877,9 +877,9 @@ d’attente, les échanges et les journaux. Cela doit également être fait entr 2018-02-10 14:17:34,353 [INFO] info: report option not implemented, ignored. 2018-02-10 09:17:34,837 [INFO] sr_poll f62 cleanup 2018-02-10 09:17:34,845 [INFO] deleting exchange xs_tsource_poll (tsource@localhost) - 2018-02-10 09:17:35,115 [INFO] sr_post shim_f63 cleanup + 2018-02-10 09:17:35,115 [INFO] sr3_post shim_f63 cleanup 2018-02-10 09:17:35,122 [INFO] deleting exchange xs_tsource_shim (tsource@localhost) - 2018-02-10 09:17:35,394 [INFO] sr_post test2_f61 cleanup + 2018-02-10 09:17:35,394 [INFO] sr3_post test2_f61 cleanup 2018-02-10 09:17:35,402 [INFO] deleting exchange xs_tsource_post (tsource@localhost) 2018-02-10 09:17:35,659 [INFO] sr_report tsarra_f20 cleanup 2018-02-10 09:17:35,659 [INFO] AMQP broker(localhost) user(tfeed) vhost(/) @@ -941,7 +941,7 @@ d’attente, les échanges et les journaux. Cela doit également être fait entr 2018-02-10 09:17:39,927 [INFO] deleting queue q_tsource.sr_subscribe.u_sftp_f60.81353341.03950190 (tsource@localhost) 2018-02-10 09:17:40,196 [WARNING] option url deprecated please use post_base_url 2018-02-10 09:17:40,196 [WARNING] use post_broker to set broker - 2018-02-10 09:17:40,197 [INFO] sr_watch f40 cleanup + 2018-02-10 09:17:40,197 [INFO] sr3_watch f40 cleanup 2018-02-10 09:17:40,207 [INFO] deleting exchange xs_tsource (tsource@localhost) 2018-02-10 09:17:40,471 [INFO] sr_winnow t00_f10 cleanup 2018-02-10 09:17:40,471 [INFO] AMQP broker(localhost) user(tfeed) vhost(/) @@ -1043,7 +1043,7 @@ While it is runnig one can run flow_check.sh at any time:: test  4 success: max shovel (100008) and subscriber t_f30 (99953) should have about the same number of items test  5 success: count of truncated headers (100008) and subscribed messages (100008) should have about the same number of items test  6 success: count of downloads by subscribe t_f30 (99953) and messages received (100008) should be about the same - test  7 success: same downloads by subscribe t_f30 (199906) and files posted (add+remove) by sr_watch (199620) should be about the same + test  7 success: same downloads by subscribe t_f30 (199906) and files posted (add+remove) by sr3_watch (199620) should be about the same test  8 success: posted by watch(199620) and subscribed cp_f60 (99966) should be about half as many test  9 success: posted by watch(199620) and sent by sr_sender (199549) should be about the same test 10 success: 0 messages received that we don't know what happenned. @@ -1100,14 +1100,14 @@ mais continue de réessayer pour toujours avec un nombre variable d’éléments Pour récupérer de cet état sans rejeter les résultats d’un test long, procédez comme suit:: ^C to interrupt the flow_check.sh 100000 - blacklab% sr stop + blacklab% sr3 stop blacklab% cd ~/.cache/sarra blacklab% ls */*/*retry* shovel/pclean_f90/sr_shovel_pclean_f90_0001.retry shovel/pclean_f92/sr_shovel_pclean_f92_0001.retry subscribe/t_f30/sr_subscribe_t_f30_0002.retry.new shovel/pclean_f91/sr_shovel_pclean_f91_0001.retry shovel/pclean_f92/sr_shovel_pclean_f92_0001.retry.state shovel/pclean_f91/sr_shovel_pclean_f91_0001.retry.state subscribe/q_f71/sr_subscribe_q_f71_0004.retry.new blacklab% rm */*/*retry* - blacklab% sr start + blacklab% sr3 start blacklab% blacklab% ./flow_check.sh 100000 Sufficient! @@ -1137,9 +1137,9 @@ Pour récupérer de cet état sans rejeter les résultats d’un test long, proc test 4 success: sr_subscribe (98068) should have the same number of items as sarra (98075) | watch routing | - test 5 success: sr_watch (397354) should be 4 times subscribe t_f30 (98068) + test 5 success: sr3_watch (397354) should be 4 times subscribe t_f30 (98068) test 6 success: sr_sender (392737) should have about the same number - of items as sr_watch (397354) + of items as sr3_watch (397354) test 7 success: sr_subscribe u_sftp_f60 (361172) should have the same number of items as sr_sender (392737) test 8 success: sr_subscribe cp_f61 (361172) should have the same @@ -1150,11 +1150,11 @@ Pour récupérer de cet état sans rejeter les résultats d’un test long, proc test 10 success: sr_subscribe q_f71 (195406) should have about the same number of items as sr_poll test1_f62(195408) | flow_post routing | - test 11 success: sr_post test2_f61 (193541) should have half the same + test 11 success: sr3_post test2_f61 (193541) should have half the same number of items of sr_sender(196368) test 12 success: sr_subscribe ftp_f70 (193541) should have about the - same number of items as sr_post test2_f61(193541) - test 13 success: sr_post test2_f61 (193541) should have about the same + same number of items as sr3_post test2_f61(193541) + test 13 success: sr3_post test2_f61 (193541) should have about the same number of items as shim_f63 195055 | py infos routing | test 14 success: sr_shovel pclean_f90 (97019) should have the same diff --git a/docs/source/fr/Contribution/assemblage_on_part.rst b/docs/source/fr/Contribution/assemblage_on_part.rst index 07ff68916..47eb23a1e 100644 --- a/docs/source/fr/Contribution/assemblage_on_part.rst +++ b/docs/source/fr/Contribution/assemblage_on_part.rst @@ -5,9 +5,9 @@ Réassemblage de fichiers Composants ---------- -**sr_watch:** Vous pouvez utiliser sr_watch pour surveiller un répertoire pour les fichiers +**sr_watch:** Vous pouvez utiliser sr3_watch pour surveiller un répertoire pour les fichiers de partition entrants (.Part) de sr_subscribe ou sr_sender, les deux ont la possibilité d’envoyer -un fichier dans des partitions. Dans le fichier de configuration pour sr_watch les paramètres +un fichier dans des partitions. Dans le fichier de configuration pour sr3_watch les paramètres importants à inclure sont les suivants : - chemin @@ -46,7 +46,7 @@ Après avoir été déclenché par un fichier de pièce téléchargé : Test ---- -Créer un fichier de configuration sr_watch selon le modèle ci-dessus. +Créer un fichier de configuration sr3_watch selon le modèle ci-dessus. Démarrez le processus en tapant la commande suivante : ```sr_watch foreground path/to/config_file.cfg``` Ensuite, créez un fichier de configuration d’abonné et incluez ```inplace off``` afin que le fichier diff --git a/docs/source/fr/Contribution/v03.rst b/docs/source/fr/Contribution/v03.rst index cda4565e2..254f9dfd1 100644 --- a/docs/source/fr/Contribution/v03.rst +++ b/docs/source/fr/Contribution/v03.rst @@ -474,7 +474,7 @@ Avec le shovel et le winnow remplacés par de nouvelles implémentations, il pas le test de flux dynamique, y compris le module Retry porté sur sr3, et un certain nombre de modules v2 utilisés tels quels. -Terminé une version initiale du composant sr_post maintenant (dans sr3 : flowcb.gather.file.File) +Terminé une version initiale du composant sr3_post maintenant (dans sr3 : flowcb.gather.file.File) Maintenant, on travaille sur sr_poll, ce qui prendra un certain temps car il implique un refactoring: sr_file, sr_http, sr_ftp, sr_sftp dans le module de transfert. @@ -713,7 +713,7 @@ Cette section comprend un mélange de tâches qui, espérons-le, peuvent être a FIXME sont des choses laissées de côté qui doivent être vues. * **RELEASE BLOCKER** poilu. #403 - sr_watch ne fait pas de lot par lots. Il jette juste un arbre entier. + watch ne fait pas de lot par lots. Il jette juste un arbre entier. Cela devra être re-écrit avec une approche de style itérateur. Donc si vous commencez dans une arborescence avec un million de fichiers, il analysera le million entier et les présentera comme une liste de travail unique en mémoire. Cela aura des problèmes performances. @@ -724,7 +724,7 @@ FIXME sont des choses laissées de côté qui doivent être vues. l’impacte de la mémoire et le retard de production du premier fichier sont toujours là, mais au moins renvoie un lot à la fois. -* **RELEASE BLOCKER** journaux de sr_poll et sr_watch ont tendance à devenir énormes beaucoup trop rapidement. #389 +* **RELEASE BLOCKER** journaux de poll et watch ont tendance à devenir énormes beaucoup trop rapidement. #389 * essayez jsonfile pour créer des messages de notification à publier. peut construire json de manière incrémentielle, # 402 vous n’avez donc pas besoin de supprimer les éléments _deleteOnPost (vous pouvez simplement les ignorer) @@ -879,11 +879,11 @@ Fonctionnalités * les propriétés/options des classes sont désormais hiérarchiques, de sorte qu’elles peuvent définir le débogage sur des classes spécifiques dans l’application. -* sr ability pour sélectionner plusieurs composants et configurations sur lesquels on peut opérer. +* sr3 ability pour sélectionner plusieurs composants et configurations sur lesquels on peut opérer. -* sr list examples est maintenant utilisé pour afficher des exemples distincts de ceux installés. +* sr3 list examples est maintenant utilisé pour afficher des exemples distincts de ceux installés. -* sr show est maintenant utilisé pour afficher la configuration analysée. +* sr3 show est maintenant utilisé pour afficher la configuration analysée. * les messages de notification sont accusés de réception plus rapidement, ce qui devrait aider au débit. diff --git a/docs/source/fr/Explication/Concepts.rst b/docs/source/fr/Explication/Concepts.rst index 62170ae0e..5b7dfe6bb 100644 --- a/docs/source/fr/Explication/Concepts.rst +++ b/docs/source/fr/Explication/Concepts.rst @@ -27,13 +27,13 @@ Web du même serveur, mais cela n'est pas nécessaire. Le courtier pourrait La meilleure façon d'effectuer des transferts de données est d'éviter les sondages (examination récurrente de répertoires afin de détecter des changements de fichiers.) C'est plus efficace si les rédacteurs peuvent -être amenés à émettre des messages d'annonce en format sr_post appropriés. De même, +être amenés à émettre des messages d'annonce en format sr3_post appropriés. De même, lors de la livraison, il est idéal si les destinataires utilisent sr_subscribe, et un plugin on_file pour déclencher leur traitement ultérieur, de sorte que le fichier est qui leur a été remis sans sondage. C'est la façon la plus efficace de travailler, mais... il est entendu que pas tous les logiciels ne seront coopératifs. Pour démarrer le flot en Sarracenia dans ces cas, -ca prend des outils de sondage: sr_poll (à distance), et sr_watch (locale.) +ca prend des outils de sondage: sr_poll (à distance), et sr3_watch (locale.) D'une manière générale, Linux est la principale cible de déploiement et la seule plate-forme sur laquelle les configurations de serveur sont déployées. diff --git a/docs/source/fr/Explication/ConsiderationsDeployments.rst b/docs/source/fr/Explication/ConsiderationsDeployments.rst index 08d876591..359e93287 100644 --- a/docs/source/fr/Explication/ConsiderationsDeployments.rst +++ b/docs/source/fr/Explication/ConsiderationsDeployments.rst @@ -295,7 +295,7 @@ Abonnés Afficher, Avis, Notification, publication, Ce sont des messages AMQP construits par sr_post, sr_poll, sr_poll, ou - sr_watch pour laisser les utilisateurs savoir qu'un fichier particulier est + sr3_watch pour laisser les utilisateurs savoir qu'un fichier particulier est prêt. Le format de ces messages AMQP est le suivant décrit par la page manuel `sr_post(7) <../Reference/sr3.1.html#post>`_ . Tous ces les mots sont utilisés de façon interchangeable. Les avis à chaque étape préservent l´origine d'origine diff --git a/docs/source/fr/Explication/GuideLigneDeCommande.rst b/docs/source/fr/Explication/GuideLigneDeCommande.rst index 683a35e60..a05d22dc7 100644 --- a/docs/source/fr/Explication/GuideLigneDeCommande.rst +++ b/docs/source/fr/Explication/GuideLigneDeCommande.rst @@ -1526,7 +1526,7 @@ Un autre exemple avec un fichier:: sr3 --post_baseDir /data/web/public_data --post_baseUrl http://dd.weather.gc.ca/ --path bulletins/alphanumeric/SACN32_CWAO_123456 -post_broker amqp://broker.com start watch/myflow -Par défaut, sr_watch vérifie le fichier /data/web/public_data/bulletins/alphanumériques/SACN32_CWAO_123456 +Par défaut, sr3_watch vérifie le fichier /data/web/public_data/bulletins/alphanumériques/SACN32_CWAO_123456 (concaténer le répertoire base_dir et le chemin relatif de l'url source pour obtenir le chemin du fichier local). Si le fichier change, il calcule sa somme de contrôle. Il construit ensuite un message, se connecte à broker.com en tant qu'utilisateur'guest'. (informations d'identification par défaut) et envoie le message aux valeurs par défaut vhost'/' et exchange'sx_guest' (échange par défaut) @@ -1538,9 +1538,9 @@ Un exemple de vérification d'un répertoire:: sr3 -post_baseDir /data/web/public_data -post_baseUrl http://dd.weather.gc.ca/ --path bulletins/alphanumeric --post_broker amqp://broker.com start watch/myflow -Ici, sr_watch vérifie la création de fichiers (modification) dans /data/web/public_data/bulletins/alphanumérique. +Ici, sr3_watch vérifie la création de fichiers (modification) dans /data/web/public_data/bulletins/alphanumérique. (concaténer le répertoire base_dir et le chemin relatif de l'url source pour obtenir le chemin du répertoire). -Si le fichier SACN32_CWAO_123456 est créé dans ce répertoire, sr_watch calcule sa somme de contrôle. +Si le fichier SACN32_CWAO_123456 est créé dans ce répertoire, sr3_watch calcule sa somme de contrôle. Il construit ensuite un message, se connecte à broker.com en tant qu'utilisateur'guest'. (informations d'identification par défaut) et envoie le message à exchange'amq.topic' (échange par défaut) diff --git a/docs/source/fr/Explication/Histoire/HPC_mirroring_cas_dutilisation.rst b/docs/source/fr/Explication/Histoire/HPC_mirroring_cas_dutilisation.rst index 6e7193f24..ec83f3553 100644 --- a/docs/source/fr/Explication/Histoire/HPC_mirroring_cas_dutilisation.rst +++ b/docs/source/fr/Explication/Histoire/HPC_mirroring_cas_dutilisation.rst @@ -162,12 +162,12 @@ possible (environ cinq à dix minutes par requête) combiné avec sr_poll pour a modifiés (et donc éligibles à la copie). C’est complètement non portable, mais on s’attendait à ce qu’il soit beaucoup plus rapide que la traversée de l’arborescence des fichiers. -Au cours de l’hiver 2016-2017, ces deux méthodes ont été mises en œuvre. Le sr_watch basé sur +Au cours de l’hiver 2016-2017, ces deux méthodes ont été mises en œuvre. Le sr3_watch basé sur INOTIFY était la méthode la plus rapide (instantanée), mais les démons avaient des problèmes de stabilité et de consommation de mémoire, et ils ont également pris trop de temps à démarrer (nécessite une traversée initiale de l’arbre, qui prend le même temps que rsync). Bien que plus lent (prenant plus de temps pour remarquer qu’un fichier a été modifié), la politique GPFS avait -une performance *acceptable* et était beaucoup plus fiable que la méthode de sr_watch parallèle, +une performance *acceptable* et était beaucoup plus fiable que la méthode de sr3_watch parallèle, et au printemps, avec un déploiement prévu pour le début de juillet 2017, l’approche stratégique du GPFS a été choisie. diff --git a/docs/source/fr/Explication/Histoire/deploiement_2018.rst b/docs/source/fr/Explication/Histoire/deploiement_2018.rst index ce090fa26..68707e45b 100644 --- a/docs/source/fr/Explication/Histoire/deploiement_2018.rst +++ b/docs/source/fr/Explication/Histoire/deploiement_2018.rst @@ -213,7 +213,7 @@ Exploration des cas d'utilisation élargie : Il était nécessaire d'implémenter une version C du code d'affichage Sarracenia pour l'utilisation par la bibliothèque shim. Une fois l'implémentation C commencée, ce n'était qu'un petit travail supplémentaire pour implémenter - une version C de sr_watch (appelé sr_cpost) qui était beaucoup plus efficace + une version C de sr3_watch (appelé sr_cpost) qui était beaucoup plus efficace en mémoire et en CPU que l'original python. * Implantation de client Node.js : Un client du datamart public a décidé d'implémenter diff --git a/docs/source/fr/Explication/Histoire/mesh_gts.rst b/docs/source/fr/Explication/Histoire/mesh_gts.rst index 8eb85beda..a38d045de 100644 --- a/docs/source/fr/Explication/Histoire/mesh_gts.rst +++ b/docs/source/fr/Explication/Histoire/mesh_gts.rst @@ -51,7 +51,7 @@ et l’accès pour d’autres acteurs de la société au sens large devraient ê Le protocole sr_post, et un certain nombre d’implémentations existantes de celui-ci, conviennent parfaitement. -Bien que l’on pense que le protocole sr_post a un grand potentiel pour améliorer l’échange +Bien que l’on pense que le protocole sr3_post a un grand potentiel pour améliorer l’échange de données de WMO, il faudra quelques années pour l’adopter, et avant son adoption, il faut s’entendre sur le contenu de l’arborescence des fichiers. Aujourd’hui, la prochaine étape consisterait à trouver des pays partenaires avec lesquels s’engager dans des échanges de @@ -751,7 +751,7 @@ proposées. Programmabilité/interopérabilité -------------------------------- -Une nouvelle application pour traiter sr_post messages peut être réimplémentée s’il y a un désir +Une nouvelle application pour traiter sr3_post messages peut être réimplémentée s’il y a un désir de le faire, car en plus de la documentation complète, le code source d’une poignée `d’implémentations <../Aperçu.html#implémentations>`_ (Python, C, Go, node.js), est facilement accessible au public. L’implémentation python dispose d’une interface de plug-in étendue disponible pour personnaliser le traitement de différentes diff --git a/docs/source/fr/Explication/SarraPluginDev.rst b/docs/source/fr/Explication/SarraPluginDev.rst index 7a6c705b6..d0e1056e5 100644 --- a/docs/source/fr/Explication/SarraPluginDev.rst +++ b/docs/source/fr/Explication/SarraPluginDev.rst @@ -1001,6 +1001,6 @@ Pour l’instant, il est préférable de consulter le `Tutoriels <../Tutoriels>` qui ont quelques exemples d’une telle utilisation. .. Note:: - **FIXME**, lien vers amqplib ou liaisons java, et pointeur vers les pages de manuel sr_post et sr_report section 7. + **FIXME**, lien vers amqplib ou liaisons java, et pointeur vers les pages de manuel sr3_post et sr_report section 7. diff --git a/docs/source/fr/Explication/StrategieDetectionFichiers.rst b/docs/source/fr/Explication/StrategieDetectionFichiers.rst index 84d148c26..a17929ff8 100644 --- a/docs/source/fr/Explication/StrategieDetectionFichiers.rst +++ b/docs/source/fr/Explication/StrategieDetectionFichiers.rst @@ -4,7 +4,7 @@ File Detection Strategies ========================= -Le travail fondamental de sr_watch est de remarquer quand les fichiers sont +Le travail fondamental de watch est de remarquer quand les fichiers sont disponibles pour être transférés. La stratégie appropriée varie en fonction de: - le **nombre de fichiers de l'arbre** à surveiller, @@ -17,7 +17,7 @@ affiche un message pour un composant *sr_sarra*, alors l'utilisation de l'option le nombre minimale de fichiers dans le répertoire et minimisera le temps de remarquer les nouveaux. Dans ces conditions optimales, l'observation des fichiers dans un centième de seconde, c'est raisonnable de s'y attendre. N'importe quelle méthode fonctionnera bien pour de tels arbres, mais... les charge imposé -sur l´ordinateur par la méthode par défaut de sr_watch (inotify) sont généralement les plus basses. +sur l´ordinateur par la méthode par défaut de watch (inotify) sont généralement les plus basses. Lorsque l’arborescence devient grande, la décision peut changer en fonction d’un certain nombre de facteurs, décrit dans le tableau suivant. Il décrit les approches qui seront les plus basses en @@ -65,9 +65,9 @@ Tableau de stratégie de détection de fichiers | | | - ou la vitesse est important. | |(en C) | - plus vite que sr_watch. | - ou on n´a pas besoin de plugins. | | | - utilise moins de mémoire vive que | - limité sues with tree size | -| | sr_watch | as sr_watch, just a little later. | +| | watch | as watch, just a little later. | | | - peut marcher avec des arbres | (see following methods) | -| | plus grand que sr_watch | | +| | plus grand que watch | | +-------------+---------------------------------------+--------------------------------------+ | |Fichier transférés avec *.tmp* suffixe.|Réception de livraisons d´autres | |sr_watch avec|lorsque complete, renommé pour enlevé |systèmes ( .tmp étant standard) | @@ -81,7 +81,7 @@ Tableau de stratégie de détection de fichiers | | au démarrage |fichiers avec seulement quelques | |(en Python) | - chaque noeud dans un grappe a besoin|secondes de delai au démarrage. | | | de tourner un instance. | | -| | - chaque sr_watch est une seul tâche. |trop lent pour des arbres de millions | +| | - chaque watch est une seul tâche. |trop lent pour des arbres de millions | | | |fichiers. | +-------------+---------------------------------------+--------------------------------------+ |sr_watch avec|utilisez conventsion linux pour cacher | | @@ -113,7 +113,7 @@ Tableau de stratégie de détection de fichiers | | |en parallèle. | +-------------+---------------------------------------+--------------------------------------+ -sr_watch est sr_post avec l'option *sleep* qui lui permettra de boucler les répertoires donnés en arguments. +sr_watch est sr3_post avec l'option *sleep* qui lui permettra de boucler les répertoires donnés en arguments. sr_cpost est une version C qui fonctionne de manière identique, sauf qu'elle est plus rapide et utilise beaucoup moins de mémoire, à l'adresse le coût de la perte du support des plugins. Avec sr_watch (et sr_cpost) La méthode par défaut de la remarque les changements dans les répertoires @@ -139,7 +139,7 @@ défaut et ne devrait pas être utilisé si la rapidité d'exécution est une pr Dans les clusters de supercalculateurs, des systèmes de fichiers distribués sont utilisés, et les méthodes optimisées pour le système d'exploitation les modifications de fichiers (INOTIFY sous Linux) -ne franchissent pas les limites des nœuds. Pour utiliser sr_watch avec la stratégie par défaut +ne franchissent pas les limites des nœuds. Pour utiliser watch avec la stratégie par défaut sur un répertoire dans un cluster de calcul, on doit généralement avoir un processus sr_watch sr_watch s'exécutant sur chaque noeud. Si cela n'est pas souhaitable, alors on peut le déployer sur un seul nœud avec *force_polling* mais le timing sera le suivant être limité par la taille du répertoire. @@ -198,7 +198,7 @@ d'écrire des fichiers dans des répertoires surveillés par sr_watch. Le fait d correctement les protocoles de complétion de fichiers est une source commune de problèmes intermittents et difficiles à diagnostiquer en matière de transfert de fichiers. Pour des transferts de fichiers fiables, Il est essentiel que les processus qui écrivent -des fichiers et sr_watch s'entendent sur la façon de représenter un fichier qui n'est pas complet. +des fichiers et watch s'entendent sur la façon de représenter un fichier qui n'est pas complet. diff --git a/docs/source/fr/Reference/index.rst b/docs/source/fr/Reference/index.rst index a34b9fc4b..ce29e5103 100644 --- a/docs/source/fr/Reference/index.rst +++ b/docs/source/fr/Reference/index.rst @@ -10,5 +10,5 @@ Référence sr3_post CLI poster sr3_credentials.7 sr3_options.7 - format de message d´annonce sr_post + format de message d´annonce sr3_post glossary diff --git a/docs/source/fr/Reference/sr3_post.1.rst b/docs/source/fr/Reference/sr3_post.1.rst index e464c94ae..7249637dc 100644 --- a/docs/source/fr/Reference/sr3_post.1.rst +++ b/docs/source/fr/Reference/sr3_post.1.rst @@ -61,7 +61,7 @@ Format de l'argument pour les option *path* :: ou relative_path_to_the/filename -L'option *-pipe* peut être spécifiée pour que sr_post lise les noms de chemin des fichiers également à partir +L'option *-pipe* peut être spécifiée pour que sr3_post lise les noms de chemin des fichiers également à partir de l'entrée standard. Un exemple d'invocation de *sr3_post* :: @@ -69,7 +69,7 @@ Un exemple d'invocation de *sr3_post* :: sr3_post --post_broker amqp://broker.com --post_baseUrl sftp://stanley@mysftpserver.com/ --path /data/shared/products/foo -Par défaut, sr_post lit le fichier /data/shared/products/foo et calcule sa somme de contrôle (checksum). +Par défaut, sr3_post lit le fichier /data/shared/products/foo et calcule sa somme de contrôle (checksum). Il crée ensuite un message de publication, se connecte à broker.com en tant qu'utilisateur "invité" (informations d'identification par défaut) et envoie l'article au vhost par défaut '/' et à l'échange par défaut. L'échange par défaut est le préfixe *xs_* suivi du nom d'utilisateur du courtier, donc par défaut 'xs_guest'. @@ -100,7 +100,7 @@ Un autre exemple:: sr3_post --post_broker mqtt://broker.com --post_baseDir /data/web/public_data --postBaseUrl http://dd.weather.gc.ca/ --path bulletins/alphanumeric/SACN32_CWAO_123456 -Par défaut, sr_post lit le fichier /data/web/public_data/bulletins/alphanumeric/SACN32_CWAO_123456 +Par défaut, sr3_post lit le fichier /data/web/public_data/bulletins/alphanumeric/SACN32_CWAO_123456 (en concaténant le post_base_dir et le chemin relatif de l'url source pour obtenir le chemin du fichier local) et calcule sa somme de contrôle. Il crée ensuite un message d'article, se connecte à broker.com en tant qu'utilisateur "invité" (informations d'identification par défaut) et envoie l'article au vhost par défaut '/' et échange 'xs_guest', résultant @@ -324,7 +324,7 @@ header = UTILISATION DE LA LIBRAIRIE SHIM ================================ -Plutôt qu'invoquer un sr_post pour poster chaque fichier à publier, on peut avoir des processus automatiquement +Plutôt qu'invoquer un sr3_post pour poster chaque fichier à publier, on peut avoir des processus automatiquement publiez les fichiers qu'ils écrivent en leur faisant utiliser une bibliothèque de shim interceptant certains appels d'i/o de fichiers vers la libc et le noyau. Pour activer la bibliothèque shim, dans l'environnement shell, ajoutez :: diff --git a/docs/source/fr/Reference/sr_post.7.rst b/docs/source/fr/Reference/sr_post.7.rst index 9bbe52ffc..a362ab045 100644 --- a/docs/source/fr/Reference/sr_post.7.rst +++ b/docs/source/fr/Reference/sr_post.7.rst @@ -41,7 +41,7 @@ SYNOPSIS Version 03 du format des annonces de modification d'un fichier pour sr_post. -Un message sr_post se compose d’un sujet et du *BODY* +Un message sr3_post se compose d’un sujet et du *BODY* **AMQP Topic:** ~~~~~~~~~~~~~~~ @@ -577,9 +577,9 @@ EXEMPLE Un Autre Exemple ---------------- -Le post résultant de la commande de sr_watch suivante, a noter la création du fichier 'foo':: +Le post résultant de la commande de sr3_watch suivante, a noter la création du fichier 'foo':: - sr_watch -pbu sftp://stanley@mysftpserver.com/ -path /data/shared/products/foo -pb amqp://broker.com + sr3_watch -pbu sftp://stanley@mysftpserver.com/ -path /data/shared/products/foo -pb amqp://broker.com Ici, *sr_watch* vérifie si le fichier /data/shared/products/foo est modifié. Lorsque cela se produit, *sr_watch* lit le fichier /data/shared/products/foo et calcule sa somme de contrôle. diff --git a/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_distant.rst b/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_distant.rst index cef9d36e7..1f8f28d3e 100644 --- a/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_distant.rst +++ b/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_distant.rst @@ -59,7 +59,7 @@ Maintenant, démarrez l’abonné nouvellement créé:: $ sr3 start swob 2015-12-03 06:53:35,268 [INFO] user_config = 0 ../swob.conf 2015-12-03 06:53:35,269 [INFO] instances 1 - 2015-12-03 06:53:35,270 [INFO] sr subscribe swob 0001 started + 2015-12-03 06:53:35,270 [INFO] sr3 subscribe swob 0001 started L’activité peut être surveillée via des fichiers journaux dans ``~/.cache/sarra/log/`` ou avec la commande *log* :: diff --git a/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_local.rst b/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_local.rst index c5c084a07..31cc185b1 100644 --- a/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_local.rst +++ b/docs/source/fr/Tutoriel/Mettre_en_place_un_subscriber_local.rst @@ -56,8 +56,8 @@ Configurez les bits qui publient les modifications apportées à l’échange :: events modify,create $ mkdir -p /tmp/sarra/{in,out}put - $ sr start - $ sr_watch log test-watch + $ sr3 start + $ sr3_watch log test-watch --> Tous les rapports normaux.:: @@ -71,7 +71,7 @@ Configurez les bits qui publient les modifications apportées à l’échange :: no=1 --> Notez la ligne avec **[ERROR]**, elle n’a pas pu trouver la fil d’attente. -c’est parce que la fil d’attente doit d’abord être créée par sr_watch et puis que que nous avons commencé l'abonné et +c’est parce que la fil d’attente doit d’abord être créée par sr3_watch et puis que que nous avons commencé l'abonné et watch en même temps avec '``sr start``' nous sommes tombés dans une petite condition de course. Cela a été résolu peu de temps après car le sr_subscribe a un temps de nouvelle tentative de 1 seconde. Cela peut être confirmé avec la page 'RabbitMQ Queues' affichant une ``q_bob.sr_subscribe.test_subscribe. ...`` fil d’attente dans la liste.:: @@ -87,7 +87,7 @@ Cela peut être confirmé avec la page 'RabbitMQ Queues' affichant une ``q_bob.s . 2020-08-20 16:29:26,078 [INFO] file_log downloaded to: /tmp/sarra/output/testfile1.txt - $ sr_watch log test-watch + $ sr3_watch log test-watch 2020-08-20 16:29:20,612 [INFO] post_log notice=20200820212920.611807823 file:/ /tmp/sarra/input/testfile1.txt headers={'to_clusters':'localhost', 'mtime':'20200820212920.0259232521', 'atime': '20200820212920.0259232521', 'mode': '644', 'parts': '1,0,1,0,0', 'sum':'d,d41d8cd98f00b204e9800998ecf8427e'} $ touch /tmp/sarra/input/testfile{2..9}.txt @@ -98,7 +98,7 @@ Maintenant, vous pouvez regarder les fichiers ruisseler dans le dossier de sorti Regardez également la page 'RabbitMQ Queues' qui recoit et traite les messages AMQP. Lorsque tout est terminé, vous pouvez arrêter à la fois l’abonné et le watcher avec:: - $ sr stop + $ sr3 stop ... $ sr_subscribe cleanup test-subscribe ... diff --git a/docs/source/fr/Tutoriel/hello_world_server.txt b/docs/source/fr/Tutoriel/hello_world_server.txt index 817d10b79..f3bfbaa4c 100644 --- a/docs/source/fr/Tutoriel/hello_world_server.txt +++ b/docs/source/fr/Tutoriel/hello_world_server.txt @@ -25,7 +25,7 @@ cat >rawall.conf < 99999: + self.seq = 1 + return str(self.seq).zfill(5) + + + def getStation(self, data): + """Extracted from Sundew code: https://github.com/MetPX/Sundew/blob/main/lib/bulletin.py#L327-L408 + Get the station ID from the bulletin contents. + Examples: + CACN00 CWAO -> Station ID located on second line. + FTCN32 CWAO -> Station ID located on first line (with header) + """ + + station = '' + + # There is no station in a binary bulletin + if self.binary: + return station + + data = data.lstrip('\n') + data = data.split('\n') + + try: + premiereLignePleine = "" + deuxiemeLignePleine = "" + + # special case, need to get the next full line. + i = 0 + for ligne in data[1:]: + i += 1 + premiereLignePleine = ligne + if len(premiereLignePleine) > 1: + if len(data) > i+1 : deuxiemeLignePleine = data[i+1] + break + + #print " ********************* header = ", data[0][0:7] + # switch depends on bulletin type. + if data[0][0:2] == "SA": + if data[1].split()[0] in ["METAR","LWIS"]: + station = premiereLignePleine.split()[1] + else: + station = premiereLignePleine.split()[0] + + elif data[0][0:2] == "SP": + station = premiereLignePleine.split()[1] + + elif data[0][0:2] in ["SI","SM"]: + station = premiereLignePleine.split()[0] + if station == "AAXX" : + if deuxiemeLignePleine != "" : + station = deuxiemeLignePleine.split()[0] + else : + station = '' + + elif data[0][0:6] in ["SRCN40","SXCN40","SRMT60","SXAK50", "SRND20", "SRND30"]: + #elif data[0][0:6] in self.wmo_id: + station = premiereLignePleine.split()[0] + + elif data[0][0:2] in ["FC","FT"]: + if premiereLignePleine.split()[1] == "AMD": + station = premiereLignePleine.split()[2] + else: + station = premiereLignePleine.split()[1] + + elif data[0][0:2] in ["UE","UG","UK","UL","UQ","US"]: + parts = premiereLignePleine.split() + if parts[0][:2] in ['EE', 'II', 'QQ', 'UU']: + station = parts[1] + elif parts[0][:2] in ['PP', 'TT']: + station = parts[2] + else: + station = '' + + elif data[0][0:2] in ["RA","MA","CA"]: + station = premiereLignePleine.split()[0].split('/')[0] + + except Exception: + station = '' + + if station != '' : + while len(station) > 1 and station[0] == '?' : + station = station[1:] + if station[0] != '?' : + station = station.split('?')[0] + if station[-1] == '=' : station = station[:-1] + else : + station = '' + + # Added to SR3 + # The station needs to be alphanumeric, between 3 and 5 characters. If not, don't assign a station + if re.search('^[a-zA-Z0-9]{3,5}$', station) == None: + station = '' + + return station + + + def getBBB(self, first_line): + """Get the BBB. If none found, return empty string. + The BBB is the field of the bulletin header that states if it was amended or not. + """ + + if len(first_line) != 4: + BBB = '' + else: + BBB = first_line[3] + + return BBB + + def buildHeader(self, first_line): + """ Build header from file contents + """ + + try: + T1T2A1A2ii = first_line[0] + CCCC = first_line[1] + + if len(first_line) >= 3: + YYGGgg = first_line[2] + header = T1T2A1A2ii + "_" + CCCC + "_" + YYGGgg + else: + header = T1T2A1A2ii + "_" + CCCC # + "_" + YYGGgg + + except Exception: + header = None + + return header + + + def getTime(self, data): + """ extract time from the data of the ca station + the data's first line looks like this : x,yyyy,jul,hhmm,... + where x is an integer of no importance, followed by obs'time + yyyy = year + jul = julian day + hhmm = hour and mins + """ + + try: + parts = data.split(',') + + if len(parts) < 4: return None + + year = parts[1] + jul = parts[2] + hhmm = parts[3] + + # passe-passe pour le jour julien en float parfois ? + f = float(jul) + i = int(f) + jul = '%s' % i + # fin de la passe-passe + + # strange 0 filler + + while len(hhmm) < 4: + hhmm = '0' + hhmm + while len(jul) < 3: + jul = '0' + jul + + # problematic 2400 for 00z + + if hhmm != '2400': + emissionStr = year + jul + hhmm + timeStruct = time.strptime(emissionStr, '%Y%j%H%M') + ddHHMM = time.strftime("%d%H%M", timeStruct) + return ddHHMM + + # sometime hhmm is 2400, to avoid exception + # set time to 00, increase by 24 hr + + jul00 = year + jul + '0000' + timeStruct = time.strptime(jul00, '%Y%j%H%M') + ep_emission = time.mktime(timeStruct) + 24 * 60 * 60 + timeStruct = time.localtime(ep_emission) + ddHHMM = time.strftime('%d%H%M', timeStruct) + return ddHHMM + except Exception as e: + return None diff --git a/sarracenia/flow/__init__.py b/sarracenia/flow/__init__.py index 233301d6c..3b55fc76a 100644 --- a/sarracenia/flow/__init__.py +++ b/sarracenia/flow/__init__.py @@ -554,6 +554,11 @@ def run(self): # restore adjustment to fileOp if 'post_fileOp' in m: m['fileOp'] = m['post_fileOp'] + + if self.o.download and 'retrievePath' in m: + # retrieve paths do not propagate after download. + del m['retrievePath'] + self._runCallbacksWorklist('after_work') @@ -805,10 +810,18 @@ def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir, u = sarracenia.baseUrlParse(msg['baseUrl']) relPath = u.path[1:] + '/' + relPath - # FIXME... why the % ? why not just assign it to copy the value? if self.o.download and 'rename' in msg: + # FIXME... why the % ? why not just assign it to copy the value? relPath = '%s' % msg['rename'] + # after download we dont propagate renaming... once used, get rid of it + del msg['rename'] + # FIXME: worry about publishing after a rename. + # the relpath should be replaced by rename value for downstream + # because the file was written to rename. + # not sure if this happens or not. + + token = relPath.split('/') filename = token[-1] diff --git a/sarracenia/flowcb/gather/am.py b/sarracenia/flowcb/gather/am.py index 1cc84b0c8..a57cf7320 100644 --- a/sarracenia/flowcb/gather/am.py +++ b/sarracenia/flowcb/gather/am.py @@ -29,12 +29,21 @@ Default value is utf-8 MissingAMHeaders (string): - Specify headers to be added inside of the file contents. + Specify headers to be added inside of the file contents. Applies only for CA,MA,RA first chars of bulletin. + Default is CN00 CWAO binaryInitialCharacters (list): Binary bulletins are characterised by having certain sets of characters on its second line. This option allows to customise which binary strings to look for to determine if a bulletin is binary or not. + mapStations2AHL (list): + Some bulletins need to get their header constructed based on a bulletin station mapping file. In sr3, this file would normally be included as stations.inc. + The format of a station mapping is the following, and is in relation to what was found on Sundew + + mapStations2AHL T1T2A1A2ii CCCC station1 station2 station3 ... + i.e. + mapStations2AHL USCN21 CTST 71126 71156 71396 ... + directory (string): Specifies the directory where the bulletin files are to be stored. @@ -56,6 +65,8 @@ from base64 import b64encode import urllib.parse import sarracenia +from sarracenia.bulletin import Bulletin +from sarracenia.flowcb.rename.raw2bulletin import Raw2bulletin import sarracenia.config from sarracenia.flowcb import FlowCB from random import randint @@ -67,6 +78,8 @@ class Am(FlowCB): def __init__(self, options): super().__init__(options,logger) + self.bulletinHandler = Bulletin() + self.renamer = Raw2bulletin(self.o) self.url = urllib.parse.urlparse(self.o.sendTo) @@ -78,6 +91,7 @@ def __init__(self, options): self.o.add_option('AllowIPs', 'list', []) self.o.add_option('inputCharset', 'str', 'utf-8') self.o.add_option('MissingAMHeaders', 'str', 'CN00 CWAO') + self.o.add_option('mapStations2AHL', 'list', []) self.o.add_option('binaryInitialCharacters', 'list', [b'BUFR' , b'GRIB', b'\211PNG']) self.host = self.url.netloc.split(':')[0] @@ -265,7 +279,94 @@ def unwrapmsg(self): return '', 0 - def gather(self): + def correctContents(self, bulletin, bulletin_firstchars, lines, missing_ahl, bulletin_station, charset): + """ Correct the bulletin contents, either of these ways + 1. Remove trailing space in bulletin header + 1. Add missing AHL headers for CA,MA,RA bulletins + 2. Add missing AHL headers by mapping station codes + 3. Add an extra line for SM/SI bulletins + """ + + # We need to get the BBB from the header, to properly rewrite it. + # FIXME: Does this only apply for the station mapping? (Not sure - ANL, 2024/02/19) + + reconstruct = 0 + ddhhmm = '' + new_bulletin = b'' + + # If there's a trailing space at the end of the bulletin header. Remove it. + if lines[0][-1:] == b' ': + lines[0] = lines[0].rstrip() + reconstruct = 1 + + # Ported from Sundew. Complete missing headers from bulletins starting with the first characters below. + if bulletin_firstchars in [ "CA", "RA", "MA" ]: + + logger.debug("Adding missing headers in file contents for CA,RA or MA bulletin") + + # We also need to get the timestamp to complete the CA,RA,MA headers + ddhhmm = self.bulletinHandler.getTime(bulletin.decode(charset)) + # If None is returned, the bulletin is invalid + if ddhhmm != None: + missing_ahl += " " + ddhhmm + + lines[0] += missing_ahl.encode(charset) + reconstruct = 1 + + # FIXME: Is this too expensive in time? + if self.o.mapStations2AHL: + for map in self.o.mapStations2AHL: + + map_elements = map.split(' ') + # First two elements of the list are the missing AHL headers that we would want to add. + ahl_from_station = map_elements[:2] + + # Check if the bulletin station is included in the mapStations2AHL options + # Also we need the first characters of the bulletin to match the ones from the mapping header. + if bulletin_station in map_elements[2:] and bulletin_firstchars == map_elements[0][:2]: + + # We want to append the new AHL without removing the timestamp nor the BBB. + bulletin_ahl = lines[0].split(b' ') + bulletin_ahl[0] = ahl_from_station[0] + ' ' + ahl_from_station[1] + + logger.debug("Adding missing headers in file contents for station mappings") + + # These bulletins should already have two elements of the header. Maybe three if the BBB is there. + if len(bulletin_ahl) == 2: + lines[0] = bulletin_ahl[0].encode(charset) + b" " + bulletin_ahl[1] + elif len(bulletin_ahl) == 3: + lines[0] = bulletin_ahl[0].encode(charset) + b" " + bulletin_ahl[1] + b" " + bulletin_ahl[2] + else: + logger.error("Not able to add new station AHLs.") + + # We found the station. We can leave the loop now. + reconstruct = 1 + break + + # From Sundew -> https://github.com/MetPX/Sundew/blob/main/lib/bulletinAm.py#L114-L115 + # AddSMHeader is set to True on all operational Sundew configs so no need to add an option + if bulletin_firstchars in ["SM", "SI"]: + + logger.debug("Adding missing line in SI/SM bulletin") + + ddhh = lines[0].split(b' ')[2][0:4] + line2add = b"AAXX " + ddhh + b"4" + lines.insert(1, line2add) + + reconstruct = 1 + + + if reconstruct == 1: + # Reconstruct the bulletin + for i in lines: + new_bulletin += i + b'\n' + + logger.debug("Missing contents added") + + return new_bulletin + + + def gather(self, messageCountMax): self.AddBuffer() @@ -291,7 +392,7 @@ def gather(self): # We only want the first two letters of the bulletin. bulletinHeader = parse[0].decode(charset).replace(' ', '_') firstchars = bulletinHeader[0:2] - + # Treat bulletin contents and compose file name try: ## NOTE: Bulletin filenames have the following naming scheme @@ -311,7 +412,7 @@ def gather(self): binary = 0 missing_ahl = self.o.MissingAMHeaders - + # Fill in temporary filename for the timebeing filename = bulletinHeader + '__' + f"{randint(self.minnum, self.maxnum)}".zfill(len(str(self.maxnum))) filepath = self.o.directory + os.sep + filename @@ -321,21 +422,14 @@ def gather(self): # From sundew source code if lines[1][:4] in self.o.binaryInitialCharacters: binary = 1 - - # Ported from Sundew. Complete missing headers from bulletins starting with the first characters below. - if firstchars in [ "CA", "RA", "MA" ]: - - logger.debug("Adding missing headers in file contents") - - lines[0] += missing_ahl.encode(charset) - - # Reconstruct the bulletin - new_bulletin = b'' - for i in lines: - new_bulletin += i + b'\n' - bulletin = new_bulletin - - logger.debug("Missing contents added") + + # Correct the bulletin contents, the Sundew way + if not binary: + station = lines[1].split()[0].decode(charset) + new_bulletin = self.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset) + if new_bulletin != b'': + bulletin = new_bulletin + except Exception as e: logger.error(f"Unable to add AHL headers. Error message: {e}") @@ -379,6 +473,10 @@ def gather(self): ident.update(bulletin) msg['identity'] = {'method':self.o.identity_method, 'value':ident.value} + # Call renamer + msg = self.renamer.rename(msg) + if msg == None: + continue logger.debug(f"New sarracenia message: {msg}") newmsg.append(msg) diff --git a/sarracenia/flowcb/log.py b/sarracenia/flowcb/log.py index e49aa8c51..f562916d1 100755 --- a/sarracenia/flowcb/log.py +++ b/sarracenia/flowcb/log.py @@ -96,6 +96,8 @@ def _messageAcceptStr(self,msg): s+= f"relPath: {msg['relPath']} " if 'retrievePath' in msg: s+= f"retrievePath: {msg['retrievePath']} " + if 'rename' in msg: + s+= f"rename: {msg['rename']} " return s def _messagePostStr(self,msg): @@ -124,6 +126,8 @@ def _messagePostStr(self,msg): s+= f"relPath: {msg['relPath']} " if 'retrievePath' in msg: s+= f"retrievePath: {msg['retrievePath']} " + if 'rename' in msg: + s+= f"rename: {msg['rename']} " return s def after_accept(self, worklist): diff --git a/sarracenia/flowcb/poll/copernicus_marine_s3.py b/sarracenia/flowcb/poll/copernicus_marine_s3.py new file mode 100644 index 000000000..0bdf9b4f1 --- /dev/null +++ b/sarracenia/flowcb/poll/copernicus_marine_s3.py @@ -0,0 +1,236 @@ +""" +Polls the Copernicus Marine Data Store STAC API and S3 buckets. +---------------------------------------------------------------- + +Based on https://github.com/MetPX/sarracenia/blob/development/sarracenia/flowcb/poll/s3bucket.py + +This was developed because the software provided by Copernicus, the Copernicus Marine Toolbox requires +Python >= 3.9 and seemed difficult to integrate into Sarracenia data flows. We also prefer to not install +packages on our servers using pip when possible. +This plugin lets us find URLs in a normal Sarracenia poll and apply accept/reject filtering to narrow down +the files we want. Duplicate suppression can also be used. + +Documentation from Copernicus Marine: + +https://help.marine.copernicus.eu/en/collections/4060068-copernicus-marine-toolbox +https://marine.copernicus.eu/news/introducing-new-copernicus-marine-data-store +https://help.marine.copernicus.eu/en/articles/8612591-switching-from-current-to-new-services +https://marine.copernicus.eu/news/unveiling-exciting-updates-copernicus-marine-service-november-2023-release +https://pypi.org/project/copernicusmarine/ + +NOTE: No authentication is currently used, and it doesn't seem to be necessary right now. This might need +to be updated in the future if they require authentication. + +Additional filtering can be performed on datasets for a productID. +Add dataset_href=some_regex to the end of a productID to include only datasets with hrefs that match the regex. + +Example Config: +^^^^^^^^^^^^^^^ + +:: + + callback poll.copernicus_marine_s3 + + # This is the base URL for the Copernicus Marine STAC API + pollUrl https://stac.marine.copernicus.eu/metadata + + productID INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030 dataset_href=.*latest.* + productID SEALEVEL_GLO_PHY_L3_NRT_008_044 + productID SEALEVEL_GLO_PHY_L4_NRT_008_046 + + # post_baseUrl will be overriden by the plugin. No need to set it in the config. + +""" +import boto3 +import botocore +import logging +import paramiko +import sarracenia +import urllib.parse +import requests +import re + +logger = logging.getLogger(__name__) + +class Copernicus_marine_s3(sarracenia.flowcb.FlowCB): + def __init__(self, options): + super().__init__(options, logger) + + # Allow setting a logLevel *only* for this plugin in the config file: + # set poll.copernicus_marine_s3.logLevel debug + if hasattr(self.o, 'logLevel'): + logger.setLevel(self.o.logLevel.upper()) + logger.debug(f"logLevel {self.o.logLevel.upper()}") + + self.o.add_option('productID', kind='list', default_value=[]) + + self.stac_base_url = self.o.pollUrl + if self.stac_base_url[-1] != '/': + self.stac_base_url += '/' + + # Parse productIDs + self.productIDs = {} + for product_id in self.o.productID: + parts = product_id.split(' ') + name = parts[0] + try: + self.productIDs[name] = None + if len(parts) > 1: + regex = parts[1].split("dataset_href=")[1].strip() + self.productIDs[name] = re.compile(regex) + except Exception as e: + logger.error(f"Invalid productID {product_id} - check the config file!") + logger.debug("Exception details:", exc_info=True) + + self.botocore_config = botocore.config.Config(s3={"addressing_style": "virtual"}, + signature_version=botocore.UNSIGNED) + + def get_s3_urls_from_stac(self, productIDs): + """ Poll the STAC API to find S3 dataset URLs for each ProductID + """ + s3_urls = {} + for id in productIDs: + try: + resp = requests.get(self.stac_base_url + id + '/product.stac.json') + resp.raise_for_status() + + product_page = resp.json() + datasets = set() + for link in product_page['links']: + if 'dataset.stac.json' in link['href']: + if productIDs[id]: # if there's a regex filter for this productID + if productIDs[id].match(link['href']): + datasets.add(link['href']) + else: + logger.debug(f"{link['href']} doesn't match {productIDs[id]}, ignoring") + else: # no regex, no need to filter + datasets.add(link['href']) + + if len(datasets) == 0: + logger.error(f"Failed to find dataset/collection link for productID {id}") + continue # keep trying other productIDs + + for dataset in datasets: + resp = requests.get(self.stac_base_url + id + '/' + dataset) + if not resp: + logger.error(f"Failed to get info for {dataset}") + continue + # Get the s3 URL from the assets section + dataset_page = resp.json() + if 'native' in dataset_page['assets']: + if id not in s3_urls: + s3_urls[id] = [] + s3_urls[id].append(dataset_page['assets']['native']['href']) + else: + logger.error("Failed to find Native dataset S3 URL for productID {id} + dataset {dataset}") + logger.debug(f"dataset page: {self.stac_base_url + id + '/' + dataset}") + + except Exception as e: + logger.error(f"Could not poll productID {id} ({e})") + logger.debug(f"Exception:", exc_info=True) + + logger.debug(f"STAC poll found {s3_urls}") + return s3_urls + + def _identify_client(self, model, params, request_signer, **kwargs): + """ Tell Copernicus who we are. + """ + # TODO: They also use x-cop-user = username, but we're not using a user account right now. If authentication + # is required in the future, we should set it. + try: + ident = {"x-cop-client": 'Sarracenia' + sarracenia.__version__} + for item in ident: + params['headers'][item] = ident[item] + # The Copernicus Marine Toolbox client sets URL parameters + params['query_string'][item] = ident[item] + params['url'] += urllib.parse.quote(f'&{item}={ident[item]}', safe='/&=') + + if 'User-Agent' in params['headers']: + params['headers']['User-Agent'] = 'Sarracenia' + sarracenia.__version__ + ' ' + params['headers']['User-Agent'] + + logger.debug(f"request: {model}, params: {params}, request_signer: {request_signer}, kwargs: {kwargs}") + except: + # Don't really care if this fails, something wrong in this method shouldn't stop the poll from working + logger.debug('Exception setting identification', exc_info=True) + + def poll(self): + """ Do the poll. First use their STAC API to find which S3 buckets and endpoints we need to poll for each + Product ID defined in the config. Then actually poll the S3 buckets. + """ + gathered_msgs = [] + s3_urls = self.get_s3_urls_from_stac(self.productIDs) + + # EXAMPLE URL: + # endpoint | bucket | prefix + # https://s3.waw3-1.cloudferro.com/mdl-native-07/native/SEALEVEL_GLO_PHY_L3_NRT_008_044/cmems_obs-sl_glo_phy-ssh_nrt_h2b-l3-duacs_PT0.2S_202311 + # Get which buckets and prefixes to poll for each endpoint + bucket_prefix_by_endpoint = {} + for id in s3_urls: + for url in s3_urls[id]: + pr = urllib.parse.urlparse(url) + endpoint = pr.scheme + '://' + pr.netloc + bucket = pr.path.strip('/').split('/')[0] + prefix = '/'.join(pr.path.strip('/').split('/')[1:]) + if endpoint not in bucket_prefix_by_endpoint: + bucket_prefix_by_endpoint[endpoint] = [] + bucket_prefix_by_endpoint[endpoint].append({'bucket':bucket, 'prefix':prefix}) + + logger.debug(f"Going to S3 list {bucket_prefix_by_endpoint}") + + # Have a bunch of prefixes now (directories), poll them to find files + objects_by_endpoint_bucket = {} + for endpoint in bucket_prefix_by_endpoint: + objects_by_endpoint_bucket[endpoint] = {} + try: + s3 = boto3.client("s3", config=self.botocore_config, endpoint_url=endpoint) + # Tell Copernicus who we are, for their monitoring + s3.meta.events.register("before-call.s3.ListObjects", self._identify_client) + for bucket_prefix in bucket_prefix_by_endpoint[endpoint]: + logger.info(f"\bing s3://{bucket_prefix['bucket']}/{bucket_prefix['prefix']} @ endpoint {endpoint}") + # s3.list_objects returns a maximum of 1000 items, need to use paginator instead + operation = 'list_objects_v2' if 'list_objects_v2' in dir(s3) else 'list_objects' + paginator = s3.get_paginator(operation) + page_iterator = paginator.paginate(Bucket=bucket_prefix['bucket'], Prefix=bucket_prefix['prefix']) + for page in page_iterator: + if 'Contents' in page: + for obj in page['Contents']: + if bucket_prefix['bucket'] not in objects_by_endpoint_bucket[endpoint]: + objects_by_endpoint_bucket[endpoint][bucket_prefix['bucket']] = [] + objects_by_endpoint_bucket[endpoint][bucket_prefix['bucket']].append(obj) + if self.stop_requested: + break + if self.stop_requested: + break + except Exception as e: + logger.error(f"Error during S3 poll for endpoint {endpoint} ({e})") + logger.debug(f"Exception:", exc_info=True) + + # Build a message for each object we found + for endpoint in objects_by_endpoint_bucket: + for bucket in objects_by_endpoint_bucket[endpoint]: + for obj in objects_by_endpoint_bucket[endpoint][bucket]: + stat = paramiko.SFTPAttributes() + if 'LastModified' in obj: + t = obj["LastModified"].timestamp() + stat.st_atime = t + stat.st_mtime = t + if 'Size' in obj: + stat.st_size = obj['Size'] + + file_path = bucket + '/' + obj['Key'] + msg = sarracenia.Message.fromFileInfo(file_path, self.o, stat) + # The (new_)baseUrl field will be set to the post_baseUrl from the config, or pollUrl if + # post_baseUrl is not set. We need to override it here, because the baseUrl can change if the + # files are coming from different endpoints. + msg['baseUrl'] = endpoint + msg['new_baseUrl'] = endpoint + # When Sarracenia runs updatePaths again later, from sarracenia.Flow, self.o.post_baseUrl will be + # different, so set msg['post_baseUrl'] here to override whatever setting it has at that point. + msg['post_baseUrl'] = endpoint + msg['_deleteOnPost'] |= {'post_baseUrl'} + + gathered_msgs.append(msg) + + logger.info(f"found {len(gathered_msgs)} files, Sarracenia will filter them") + return gathered_msgs + diff --git a/sarracenia/flowcb/rename/raw2bulletin.py b/sarracenia/flowcb/rename/raw2bulletin.py index bb356d636..3184fccf9 100644 --- a/sarracenia/flowcb/rename/raw2bulletin.py +++ b/sarracenia/flowcb/rename/raw2bulletin.py @@ -14,6 +14,7 @@ Decoding of the data is done in the same way of the encoder in flowcb/gather/am.py + Examples: RAW Ninjo file (4 letter station ID) @@ -37,22 +38,35 @@ Output filename: CACN00_CWAO_141600_PQU__00003 + A ISA binary bulletin + Input filename: ISAA41_CYZX_162000__00035 + + Contents: + ISAA41_CYZX_162000 + BUFR + + Output filename: ISAA41_CYZX_162000___00004 + Usage: - callback rename.raw2bulletin + callback rename.raw2bulletin + --- OR (inside callback) --- + from sarracenia.flowcb.rename.raw2bulletin import Raw2bulletin + def __init__(): + super().__init__(options,logger) + self.renamer = Raw2bulletin(self.o) + Contributions: Andre LeBlanc - First author (2024/02) Improvements: Delegate some of the generalized methods to a parent class. To be callable by other plugins. - Add more Sundew logic if ever some bulletins end up failing when implemented """ from sarracenia.flowcb import FlowCB +from sarracenia.bulletin import Bulletin import logging -from base64 import b64encode -import time, datetime -import subprocess +import datetime logger = logging.getLogger(__name__) @@ -61,300 +75,93 @@ class Raw2bulletin(FlowCB): def __init__(self,options) : super().__init__(options,logger) self.seq = 0 + self.binary = 0 + self.bulletinHandler = Bulletin() # Need to redeclare these options to have their default values be initialized. self.o.add_option('inputCharset', 'str', 'utf-8') self.o.add_option('binaryInitialCharacters', 'list', [b'BUFR' , b'GRIB', b'\211PNG']) # If file was converted, get rid of extensions it had - def after_accept(self,worklist): - - good_msgs = [] - - for msg in worklist.incoming: - - path = msg['new_dir'] + '/' + msg['new_file'] - - filenameFirstChars = msg['new_file'].split('_')[0] - - # AM bulletins that need their filename rewritten with data should only have two chars before the first underscore - # This is in concordance with Sundew logic -> https://github.com/MetPX/Sundew/blob/main/lib/bulletinAm.py#L70-L71 - # These messages are still good, so we will add them to the good_msgs list - if len(filenameFirstChars) != 2: - good_msgs.append(msg) - continue - - data = self.getData(msg, path) - - if data == None: - worklist.rejected.append(msg) - continue - - lines = data.split('\n') - #first_line = lines[0].strip('\r') - #first_line = first_line.strip(' ') - #first_line = first_line.strip('\t') - first_line = lines[0].split(' ') - - # Build header from bulletin - header = self.buildHeader(first_line) - if header == None: - logger.error("Unable to fetch header contents. Skipping message") - worklist.rejected.append(msg) - continue - - # Get the station timestamp from bulletin - ddhhmm = self.getTime(data) - if ddhhmm == None: - logger.error("Unable to get julian time.") - - # Get the BBB from bulletin - BBB = self.getBBB(first_line) - - # Get the station ID from bulletin - stn_id = self.getStation(data) - - # Generate a sequence (random ints) - seq = self.getSequence() - - # Rename file with data fetched - try: - # We can't disseminate bulletins downstream if they're missing the timestamp, but we want to keep the bulletins to troubleshoot source problems - # We'll append "_PROBLEM" to the filename to be able to identify erronous bulletins - if ddhhmm == None: - timehandler = datetime.datetime.now() + def rename(self,msg): - # Add current time as new timestamp to filename - new_file = header + "_" + timehandler.strftime('%d%H%M') + "_" + BBB + "_" + stn_id + "_" + seq + "_PROBLEM" + path = msg['new_dir'] + '/' + msg['new_file'] - # Write the file manually as the messages don't get posted downstream. - # The message won't also get downloaded further downstream - msg['new_file'] = new_file - new_path = msg['new_dir'] + '/' + msg['new_file'] + data = self.bulletinHandler.getData(msg, path) - with open(new_path, 'w') as f: f.write(data) - - logger.error(f"New filename (for problem file): {new_file}") - raise Exception - else: - new_file = header + "_" + ddhhmm + "_" + BBB + "_" + stn_id + "_" + seq - - msg['new_file'] = new_file - new_path = msg['new_dir'] + '/' + msg['new_file'] + # AM bulletins that need their filename rewritten with data should only have two chars before the first underscore + # This is in concordance with Sundew logic -> https://github.com/MetPX/Sundew/blob/main/lib/bulletinAm.py#L70-L71 + # These messages are still good, so we will add them to the good_msgs list + # if len(filenameFirstChars) != 2 and self.binary: + # good_msgs.append(msg) + # continue - logger.info(f"New filename (with path): {new_path}") - - good_msgs.append(msg) - - except Exception as e: - logger.error(f"Error in renaming. Error message: {e}") - worklist.rejected.append(msg) - continue - - worklist.incoming = good_msgs - - - def getData(self, msg, path): - """Get the bulletin data. - We can either get the bulletin data via - 1. Sarracenia message content - 2. Locally downloaded file - """ - - # Read file data from message or from file path directly if message content not found. - try: - - binary = 0 - if msg['content']: - data = msg['content']['value'] - else: - - fp = open(path, 'rb') - data = fp.read() - # bulletin = Bulletin(data) - fp.close() - - # Decode data, binary and text. Integrate inputCharset - if data.splitlines()[1][:4] in self.o.binaryInitialCharacters: - binary = 1 - - if not binary: - data = data.decode(self.o.inputCharset) - else: - data = b64encode(data).decode('ascii') - - return data - - except Exception as e: - logger.error(f"Could not fetch file data of from either message content or {path}. Error details: {e}") + if data == None: return None - - - def getSequence(self): - """ sequence number to make the file unique... - """ - self.seq = self.seq + 1 - if self.seq > 99999: - self.seq = 1 - return str(self.seq).zfill(5) - - - def getStation(self, data): - """Extracted from Sundew code: https://github.com/MetPX/Sundew/blob/main/lib/bulletin.py#L327-L408 - Get the station ID from the bulletin contents. - Examples: - CACN00 CWAO -> Station ID located on second line. - FTCN32 CWAO -> Station ID located on first line (with header) - """ - - station = '' - data = data.lstrip('\n') - data = data.split('\n') - - try: - premiereLignePleine = "" - deuxiemeLignePleine = "" - - # special case, need to get the next full line. - i = 0 - for ligne in data[1:]: - i += 1 - premiereLignePleine = ligne - if len(premiereLignePleine) > 1: - if len(data) > i+1 : deuxiemeLignePleine = data[i+1] - break - - #print " ********************* header = ", data[0][0:7] - # switch depends on bulletin type. - if data[0][0:2] == "SA": - if data[1].split()[0] in ["METAR","LWIS"]: - station = premiereLignePleine.split()[1] - else: - station = premiereLignePleine.split()[0] - - elif data[0][0:2] == "SP": - station = premiereLignePleine.split()[1] - - elif data[0][0:2] in ["SI","SM"]: - station = premiereLignePleine.split()[0] - if station == "AAXX" : - if deuxiemeLignePleine != "" : - station = deuxiemeLignePleine.split()[0] - else : - station = '' - - elif data[0][0:6] in ["SRCN40","SXCN40","SRMT60","SXAK50", "SRND20", "SRND30"]: - #elif data[0][0:6] in self.wmo_id: - station = premiereLignePleine.split()[0] - - elif data[0][0:2] in ["FC","FT"]: - if premiereLignePleine.split()[1] == "AMD": - station = premiereLignePleine.split()[2] - else: - station = premiereLignePleine.split()[1] - - elif data[0][0:2] in ["UE","UG","UK","UL","UQ","US"]: - parts = premiereLignePleine.split() - if parts[0][:2] in ['EE', 'II', 'QQ', 'UU']: - station = parts[1] - elif parts[0][:2] in ['PP', 'TT']: - station = parts[2] - else: - station = '' - - elif data[0][0:2] in ["RA","MA","CA"]: - station = premiereLignePleine.split()[0].split('/')[0] - - except Exception: - station = '' - if station != '' : - while len(station) > 1 and station[0] == '?' : - station = station[1:] - if station[0] != '?' : - station = station.split('?')[0] - if station[-1] == '=' : station = station[:-1] - else : - station = '' - - return station - - - def getBBB(self, first_line): - """Get the BBB. If none found, return empty string. - The BBB is the field of the bulletin header that states if it was amended or not. - """ - - if len(first_line) != 4: - BBB = '' + lines = data.split('\n') + #first_line = lines[0].strip('\r') + #first_line = first_line.strip(' ') + #first_line = first_line.strip('\t') + first_line = lines[0].split(' ') + + # Build header from bulletin + header = self.bulletinHandler.buildHeader(first_line) + if header == None: + logger.error("Unable to fetch header contents. Skipping message") + return None + + # Get the station timestamp from bulletin + if len(header.split('_')) == 2: + ddhhmm = self.bulletinHandler.getTime(data) + if ddhhmm == None: + logger.error("Unable to get julian time.") else: - BBB = first_line[3] - - return BBB - - def buildHeader(self, first_line): - """ Build header from file contents - """ - - try: - T1T2A1A2ii = first_line[0] - CCCC = first_line[1] - # YYGGgg = parts[2] - - header = T1T2A1A2ii + "_" + CCCC # + "_" + YYGGgg - - except Exception: - header = None + ddhhmm = '' + + # Get the BBB from bulletin + BBB = self.bulletinHandler.getBBB(first_line) - return header + # Get the station ID from bulletin + stn_id = self.bulletinHandler.getStation(data) + # Generate a sequence (random ints) + seq = self.bulletinHandler.getSequence() - def getTime(self, data): - """ extract time from the data of the ca station - the data's first line looks like this : x,yyyy,jul,hhmm,... - where x is an integer of no importance, followed by obs'time - yyyy = year - jul = julian day - hhmm = hour and mins - """ + + # Rename file with data fetched try: - parts = data.split(',') - - if len(parts) < 4: return None + # We can't disseminate bulletins downstream if they're missing the timestamp, but we want to keep the bulletins to troubleshoot source problems + # We'll append "_PROBLEM" to the filename to be able to identify erronous bulletins + if ddhhmm == None: + timehandler = datetime.datetime.now() - year = parts[1] - jul = parts[2] - hhmm = parts[3] + # Add current time as new timestamp to filename + new_file = header + "_" + timehandler.strftime('%d%H%M') + "_" + BBB + "_" + stn_id + "_" + seq + "_PROBLEM" - # passe-passe pour le jour julien en float parfois ? - f = float(jul) - i = int(f) - jul = '%s' % i - # fin de la passe-passe + # Write the file manually as the messages don't get posted downstream. + # The message won't also get downloaded further downstream + msg['new_file'] = new_file + new_path = msg['new_dir'] + '/' + msg['new_file'] - # strange 0 filler + # with open(new_path, 'w') as f: f.write(data) - while len(hhmm) < 4: - hhmm = '0' + hhmm - while len(jul) < 3: - jul = '0' + jul + logger.error(f"New filename (for problem file): {new_file}") + elif ddhhmm == '': + new_file = header + "_" + BBB + "_" + stn_id + "_" + seq + else: + new_file = header + "_" + ddhhmm + "_" + BBB + "_" + stn_id + "_" + seq - # problematic 2400 for 00z + msg['new_file'] = new_file + # We need the rest of the fields to be also updated + del(msg['relPath']) + msg.updatePaths(self.o, msg['new_dir'], msg['new_file']) - if hhmm != '2400': - emissionStr = year + jul + hhmm - timeStruct = time.strptime(emissionStr, '%Y%j%H%M') - ddHHMM = time.strftime("%d%H%M", timeStruct) - return ddHHMM + logger.info(f"New filename (with path): {msg['relPath']}") - # sometime hhmm is 2400, to avoid exception - # set time to 00, increase by 24 hr + return msg - jul00 = year + jul + '0000' - timeStruct = time.strptime(jul00, '%Y%j%H%M') - ep_emission = time.mktime(timeStruct) + 24 * 60 * 60 - timeStruct = time.localtime(self.ep_emission) - ddHHMM = time.strftime('%d%H%M', timeStruct) - return ddHHMM except Exception as e: - return None + logger.error(f"Error in renaming. Error message: {e}") + + return None \ No newline at end of file diff --git a/sarracenia/moth/amqp.py b/sarracenia/moth/amqp.py index 15a71a6ca..fe8fa5141 100755 --- a/sarracenia/moth/amqp.py +++ b/sarracenia/moth/amqp.py @@ -34,6 +34,7 @@ from sarracenia.postformat import PostFormat from sarracenia.moth import Moth import signal +import os import time from urllib.parse import unquote @@ -119,6 +120,7 @@ def _msgRawToDict(self, raw_msg) -> sarracenia.Message: logger.error( 'ignoring message. UTF8 encoding expected. raw message received: %s' % ex) logger.debug('Exception details: ', exc_info=True) + self.channel.basic_ack( raw_msg.delivery_info['delivery_tag']) return None if 'content_type' in raw_msg.properties: @@ -129,6 +131,7 @@ def _msgRawToDict(self, raw_msg) -> sarracenia.Message: msg = PostFormat.importAny( body, raw_msg.headers, content_type, self.o ) if not msg: logger.error('Decode failed, discarding message') + self.channel.basic_ack( raw_msg.delivery_info['delivery_tag']) return None topic = raw_msg.delivery_info['routing_key'].replace( @@ -237,8 +240,8 @@ def __connect(self, broker) -> bool: # check for amqp 1.3.3 and 1.4.9 because connect doesn't exist in those older versions self.connection.connect() - self.channel = self.connection.channel() - self.management_channel = self.connection.channel() + self.management_channel = self.connection.channel(1) + self.channel = self.connection.channel(2) return True def _amqp_setup_signal_handler(self, signum, stack): @@ -252,7 +255,7 @@ def metricsReport(self): next_time = self.last_qDeclare + 30 now=time.time() if next_time <= now: - self._queueDeclare(passive=True) + #self._queueDeclare(passive=True) self.last_qDeclare=now super().metricsReport() @@ -404,7 +407,7 @@ def getSetup(self) -> None: signal.signal(signal.SIGINT, original_sigint) signal.signal(signal.SIGTERM, original_sigterm) if self.please_stop: - signal.raise_signal(signal.SIGINT) + os.kill(os.getpid(), signal.SIGINT) def putSetup(self) -> None: @@ -478,7 +481,7 @@ def putSetup(self) -> None: signal.signal(signal.SIGINT, original_sigint) signal.signal(signal.SIGTERM, original_sigterm) if self.please_stop: - signal.raise_signal(signal.SIGINT) + os.kill(os.getpid(), signal.SIGINT) def putCleanUp(self) -> None: diff --git a/sarracenia/moth/mqtt.py b/sarracenia/moth/mqtt.py index 278a447e1..5afa1d21b 100755 --- a/sarracenia/moth/mqtt.py +++ b/sarracenia/moth/mqtt.py @@ -34,6 +34,7 @@ from sarracenia.postformat import PostFormat from sarracenia.moth import Moth import signal +import os import ssl import threading import time @@ -417,7 +418,7 @@ def getSetup(self): signal.signal(signal.SIGINT, original_sigint) signal.signal(signal.SIGTERM, original_sigterm) if self.please_stop: - signal.raise_signal(signal.SIGINT) + os.kill(os.getpid(), signal.SIGINT) @@ -498,7 +499,7 @@ def putSetup(self): signal.signal(signal.SIGINT, original_sigint) signal.signal(signal.SIGTERM, original_sigterm) if self.please_stop: - signal.raise_signal(signal.SIGINT) + os.kill(os.getpid(), signal.SIGINT) def __sub_on_message(client, userdata, msg): diff --git a/sarracenia/sr.py b/sarracenia/sr.py index 42e35e5d2..60bd06dde 100755 --- a/sarracenia/sr.py +++ b/sarracenia/sr.py @@ -1353,7 +1353,7 @@ def declare(self): u_url.username, u_url.password, self.options.dry_run ) # declare admin exchanges. - if hasattr(self,'default_cfg'): + if hasattr(self,'default_cfg') and self.default_cfg.admin: logger.info( f"Declaring exchnges for admin.conf using {self.default_cfg.admin} ") if hasattr(self.default_cfg, 'declared_exchanges'): xdc = sarracenia.moth.Moth.pubFactory( @@ -1717,21 +1717,22 @@ def cleanup(self): if self.please_stop: break cache_dir = self.user_cache_dir + os.sep + f.replace('/', os.sep) - for state_file in os.listdir(cache_dir): - if self.please_stop: - break - if state_file[0] == '.': - continue + if os.path.isdir(cache_dir): + for state_file in os.listdir(cache_dir): + if self.please_stop: + break + if state_file[0] == '.': + continue - if state_file in [ 'disabled' ]: - continue + if state_file in [ 'disabled' ]: + continue - asf = cache_dir + os.sep + state_file - if self.options.dry_run: - print('removing state file (dry run): %s' % asf) - else: - print('removing state file: %s' % asf) - os.unlink(asf) + asf = cache_dir + os.sep + state_file + if self.options.dry_run: + print('removing state file (dry run): %s' % asf) + else: + print('removing state file: %s' % asf) + os.unlink(asf) print_column = 0 @@ -2806,7 +2807,7 @@ def overview(self): elif len(status['running']) == (len(self.configs[c]) - len(status['disabled'])): print('%-10s %-10s %-6s %-3d %s' % (c, 'most', 'OKd', \ - (len(self.configs[c]) - len(status['disabled']), ', '.join(status['running'] ))) ) + len(self.configs[c]) - len(status['disabled']), ', '.join(status['running'] ))) else: print('%-10s %-10s %-6s %3d' % (c, 'mixed', 'mult', len(self.configs[c])))