From 949874161152da347dd978c994a67d58fc857134 Mon Sep 17 00:00:00 2001 From: Rebecca Pearce <17481621+beccapearce@users.noreply.github.com> Date: Fri, 29 Nov 2024 01:53:34 +0000 Subject: [PATCH] Add rake task to remove uses of advisory govspeak Creates 2 temporary rakes task which: - Finds all the uses of the govspeak advisory component in edition body and html attachment body - Changes the uses of advisory into information callouts (as recommended in the [govspeak README](https://github.com/alphagov/govspeak/blob/main/README.md#advisory-deprecated-marked-for-removal-use-information-callouts-instead)) - Adds these changes to a new draft of the edition and publishes it as a minor update - Outputs the successes and failures of the task with any errors. We are removing the uses of advisory since it has been deprecated for [4 years](https://github.com/alphagov/govspeak/pull/195), and it will make our code easier to maintain. Trello card: https://trello.com/c/0aGTHsml/2975-prep-dev-work-to-replace-uses-of-advisory-component-with-information-call-out --- lib/govspeak/embedded_content_patterns.rb | 1 + lib/govspeak/remove_advisory_service.rb | 125 +++++++++++++++++ lib/tasks/remove_advisory.rake | 127 ++++++++++++++++++ .../govspeak/remove_advisory_service_test.rb | 51 +++++++ test/unit/lib/tasks/remove_advisory_test.rb | 75 +++++++++++ 5 files changed, 379 insertions(+) create mode 100644 lib/govspeak/remove_advisory_service.rb create mode 100644 lib/tasks/remove_advisory.rake create mode 100644 test/unit/lib/govspeak/remove_advisory_service_test.rb create mode 100644 test/unit/lib/tasks/remove_advisory_test.rb diff --git a/lib/govspeak/embedded_content_patterns.rb b/lib/govspeak/embedded_content_patterns.rb index 784f51a4e8f..d938972e02f 100644 --- a/lib/govspeak/embedded_content_patterns.rb +++ b/lib/govspeak/embedded_content_patterns.rb @@ -4,5 +4,6 @@ module EmbeddedContentPatterns ADMIN_EDITION_PATH = %r{/admin/(?:#{Whitehall.edition_route_path_segments.join('|')})/(\d+)} ADMIN_ORGANISATION_CIP_PATH = %r{/admin/organisations/([\w-]+)/corporate_information_pages/(\d+)} ADMIN_WORLDWIDE_ORGANISATION_CIP_PATH = %r{/admin/worldwide_organisations/([\w-]+)/corporate_information_pages/(\d+)} + ADVISORY = /(^@)([\s\S]*?)(@?)(?=(?:^\$CTA|\r?\n\r?\n|^@|$))/m end end diff --git a/lib/govspeak/remove_advisory_service.rb b/lib/govspeak/remove_advisory_service.rb new file mode 100644 index 00000000000..7572de876da --- /dev/null +++ b/lib/govspeak/remove_advisory_service.rb @@ -0,0 +1,125 @@ +module Govspeak + class RemoveAdvisoryService + attr_reader :body + + def initialize(object, dry_run: true) + @object = object + @body = object.body || object.govspeak_content.body + @whodunnit = User.find_by(name: "GDS Inside Government Team") + @dry_run = dry_run + end + + def process! + if @dry_run + matches = find_all_advisories(body) + puts "\n[DRY RUN] Advisory changes detected for #{@object.title}, (ID: #{@object.id}):" + puts "belongs to #{@object.attachable.title}" if @object.is_a?(HtmlAttachment) + puts "----------------------------------" + matches.each do |match| + puts "Old advisory:\n#{match[:old]}" + puts "New advisory:\n#{match[:new]}" + puts "----------------------------------" + end + return + end + if @object.is_a?(Edition) + AuditTrail.acting_as(@whodunnit) do + # Create a new draft of the edition + draft = @object.create_draft(@whodunnit) + + # Replace advisories in the body of the edition + new_body = replace_all_advisories(body) + + # Update the draft edition with the new body and set to minor change + draft.update!( + body: new_body, + minor_change: true, + ) + submit_and_publish!(draft) + end + elsif @object.is_a?(HtmlAttachment) + AuditTrail.acting_as(@whodunnit) do + # Create a draft of the edition the attachment belongs to + draft = @object.attachable.create_draft(@whodunnit) + + # Find the relevant attachment in the new draft + new_attachment = draft.html_attachments.find_by(slug: @object.slug) + + # Replace advisories in the body of the new attachment + new_body = replace_all_advisories(new_attachment.body) + new_attachment.govspeak_content.update!(body: new_body) + + # Set the owning draft edition to be a minor change + draft.update!(minor_change: true) + submit_and_publish!(draft) + end + else + raise "Unsupported object type: #{@object.class.name}" + end + end + + def submit_and_publish!(draft) + # Submit the draft so it is ready to be published + draft.submit! + + # Add a reason for force publishing + publish_reason = "Replacing deprecated advisory elements with information callouts" + + # Publish the edition + edition_publisher = Whitehall.edition_services.publisher(draft, user: @whodunnit, remark: publish_reason) + edition_publisher.perform! + end + + def replace_all_advisories(body_content) + match = advisory_match_group(body_content) + return body_content if match.nil? + + new_body = replace_advisory_with_information_callout(match, body_content) + replace_all_advisories(new_body) + end + + def advisory_match_group(body_content) + match_data = body_content.match(regexp_for_advisory_markup) + return unless match_data + + { + opening_at: match_data[1], + content_after_at: match_data[2], + closing_at: match_data[3], + other_possible_line_ends: match_data[4], + } + end + + def regexp_for_advisory_markup + opening_at = "(^@)" + content_after_at = '([\s\S]*?)' + closing_at = "(@?)" + other_possible_line_ends = '(?:^\$CTA|\r?\n\r?\n|^@|$)' + Regexp.new("#{opening_at}#{content_after_at}#{closing_at}(?=#{other_possible_line_ends})", Regexp::MULTILINE) + end + + def replace_advisory_with_information_callout(match, body_content) + string_to_modify = if match[:closing_at].present? + match[:opening_at] + match[:content_after_at] + match[:closing_at] + else + match[:opening_at] + match[:content_after_at] + end + + body_content.gsub(string_to_modify, information_calloutify(match[:content_after_at])) + end + + def information_calloutify(string) + "^#{string}^" + end + + def find_all_advisories(body_content) + matches = [] + body_content.scan(regexp_for_advisory_markup) do |opening_at, content_after_at, closing_at| + old = closing_at.present? ? "#{opening_at}#{content_after_at}#{closing_at}" : "#{opening_at}#{content_after_at}" + new = information_calloutify(content_after_at) + matches << { old: old, new: new } + end + matches + end + end +end diff --git a/lib/tasks/remove_advisory.rake b/lib/tasks/remove_advisory.rake new file mode 100644 index 00000000000..c4f2e9d4546 --- /dev/null +++ b/lib/tasks/remove_advisory.rake @@ -0,0 +1,127 @@ +namespace :remove_advisory do + desc "Process advisory govspeak in published editions" + task published_editions: :environment do + regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s + successes = [] + failures = [] + published_content_containing_advisory_govspeak = [] + + puts "\nProcessing published editions...\n" + + Edition + .where(state: "published") + .joins("RIGHT JOIN edition_translations ON edition_translations.edition_id = editions.id") + .where("body REGEXP ?", regex) + .find_each do |object| + published_content_containing_advisory_govspeak << object.document_id + end + + published_content_containing_advisory_govspeak.each do |document_id| + edition = Document.find(document_id).latest_edition + Govspeak::RemoveAdvisoryService.new(edition, dry_run: false).process! + successes << edition.content_id + print "S" + rescue StandardError => e + failures << { content_id: edition.content_id, error: e.message } + print "F" + end + + summarize_results(successes, failures) + end + + desc "Dry run to show which editions would have advisory govspeak processed" + task dry_run_published_editions: :environment do + regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s + + successes = [] + failures = [] + published_content_containing_advisory_govspeak = [] + + puts "\nStarting dry run of published editions...\n" + + Edition + .where(state: "published") + .joins("RIGHT JOIN edition_translations ON edition_translations.edition_id = editions.id") + .where("body REGEXP ?", regex) + .find_each do |object| + published_content_containing_advisory_govspeak << object.document_id + end + + published_content_containing_advisory_govspeak.each do |document_id| + edition = Document.find(document_id).latest_edition + Govspeak::RemoveAdvisoryService.new(edition, dry_run: true).process! + successes << edition.content_id + print "S" + rescue StandardError => e + failures << { content_id: edition.content_id, error: e.message } + print "F" + end + + summarize_results(successes, failures) + end + + desc "Process advisory govspeak in published HTML attachments" + task published_html_attachments: :environment do + regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s + + successes = [] + failures = [] + + puts "\nProcessing published HTML attachments...\n" + + HtmlAttachment + .joins(:govspeak_content) + .where(deleted: false) + .where.not(attachable: nil) + .where("govspeak_contents.body REGEXP ?", regex) + .find_each do |attachment| + next if attachment.attachable.respond_to?(:state) && attachment.attachable.state != "published" + + Govspeak::RemoveAdvisoryService.new(attachment, dry_run: false).process! + successes << attachment.content_id + print "S" + rescue StandardError => e + failures << { content_id: attachment.content_id, error: e.message } + print "F" + end + + summarize_results(successes, failures) + end +end + +desc "Dry run to show which HTML publications would have advisory govspeak processed" +task dry_run_published_html_attachments: :environment do + regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s + + successes = [] + failures = [] + + puts "\nStarting dry run of published HTML attachments...\n" + + HtmlAttachment + .joins(:govspeak_content) + .where(deleted: false) + .where.not(attachable: nil) + .where("govspeak_contents.body REGEXP ?", regex) + .find_each do |attachment| + next if attachment.attachable.respond_to?(:state) && attachment.attachable.state != "published" + + Govspeak::RemoveAdvisoryService.new(attachment, dry_run: true).process! + successes << attachment.content_id + print "S" + rescue StandardError => e + failures << { content_id: attachment.content_id, error: e.message } + print "F" + end + + summarize_results(successes, failures) +end + +def summarize_results(successes, failures) + puts "\n\nSummary:\n" + puts "Successes: #{successes.count}" + puts "Failures: #{failures.count}" + failures.each do |failure| + puts "Failed Content ID: #{failure[:content_id]}, Error: #{failure[:error]}" + end +end diff --git a/test/unit/lib/govspeak/remove_advisory_service_test.rb b/test/unit/lib/govspeak/remove_advisory_service_test.rb new file mode 100644 index 00000000000..e665a101100 --- /dev/null +++ b/test/unit/lib/govspeak/remove_advisory_service_test.rb @@ -0,0 +1,51 @@ +require "test_helper" + +class Govspeak::RemoveAdvisoryServiceTest < ActiveSupport::TestCase + test "advisory_match_group matches if the line begins with an @, and ends with a carriage return" do + body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.\r\n\r\n" + edition = create(:published_edition, body:) + + expected = { + opening_at: "@", + content_after_at: " New online safety legislation is coming which will aim to reduce online harms.", + closing_at: "", + other_possible_line_ends: nil, + } + service = Govspeak::RemoveAdvisoryService.new(edition) + assert_equal expected, service.advisory_match_group(body) + end + + test "advisory_match_group matches if the line begins with an @, and ends with an @" do + body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.@\r\n\r\n" + edition = create(:published_edition, body:) + + expected = { + opening_at: "@", + content_after_at: " New online safety legislation is coming which will aim to reduce online harms.", + closing_at: "@", + other_possible_line_ends: nil, + } + service = Govspeak::RemoveAdvisoryService.new(edition) + assert_equal expected, service.advisory_match_group(body) + end + + test "replace_all_advisories can replace a single advisory" do + body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.@\r\n\r\n" + edition = create(:published_edition, body:) + service = Govspeak::RemoveAdvisoryService.new(edition) + + expected = "\r\n^ New online safety legislation is coming which will aim to reduce online harms.^\r\n\r\n" + + assert_equal expected, service.replace_all_advisories(edition.body) + end + + test "replace_all_advisories can replace multiple advisories" do + body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.@\r\n\r\n@ And here's another. @\r\n\r\n" + edition = create(:published_edition, body:) + service = Govspeak::RemoveAdvisoryService.new(edition) + + expected = "\r\n^ New online safety legislation is coming which will aim to reduce online harms.^\r\n\r\n^ And here's another. ^\r\n\r\n" + + assert_equal expected, service.replace_all_advisories(edition.body) + end +end diff --git a/test/unit/lib/tasks/remove_advisory_test.rb b/test/unit/lib/tasks/remove_advisory_test.rb new file mode 100644 index 00000000000..ad73c114f19 --- /dev/null +++ b/test/unit/lib/tasks/remove_advisory_test.rb @@ -0,0 +1,75 @@ +require "test_helper" +require "rake" + +class RemoveAdvisoryTasksTest < ActiveSupport::TestCase + teardown do + Rake::Task["remove_advisory:published_editions"].reenable + Rake::Task["remove_advisory:published_html_attachments"].reenable + end + + test "published_editions processes editions with advisory" do + edition = create(:published_edition, body: "@example@") + create(:gds_team_user, name: "GDS Inside Government Team") + + Rake::Task["remove_advisory:published_editions"].invoke + + new_edition = edition.document.latest_edition + assert_match "^example^", new_edition.body + end + + test "published_editions processes editions with advisory followed by 2 empty lines" do + edition = create(:published_edition, body: "@example\n\n") + create(:gds_team_user, name: "GDS Inside Government Team") + + Rake::Task["remove_advisory:published_editions"].invoke + + new_edition = edition.document.latest_edition + assert_match "^example^\n\n", new_edition.body + end + + test "published_editions processes editions with advisory followed by call to action" do + edition = create(:published_edition, body: "@example\n$CTA") + create(:gds_team_user, name: "GDS Inside Government Team") + + Rake::Task["remove_advisory:published_editions"].invoke + + new_edition = edition.document.latest_edition + assert_match "^example^\n$CTA", new_edition.body + end + + test "published_html_attachments processes HTML attachments with plain advisory" do + edition = create(:published_edition) + attachment = create(:html_attachment, attachable: edition, body: "@example@") + create(:gds_team_user, name: "GDS Inside Government Team") + + Rake::Task["remove_advisory:published_html_attachments"].invoke + + new_edition = attachment.attachable.document.latest_edition + new_attachment = new_edition.html_attachments.first + assert_match "^example^", new_attachment.body + end + + test "published_html_attachments processes HTML attachments with advisory followed by blank lines" do + edition = create(:published_edition) + attachment = create(:html_attachment, attachable: edition, body: "@example\n\n") + create(:gds_team_user, name: "GDS Inside Government Team") + + Rake::Task["remove_advisory:published_html_attachments"].invoke + + new_edition = attachment.attachable.document.latest_edition + new_attachment = new_edition.html_attachments.first + assert_match "^example^", new_attachment.body + end + + test "published_html_attachments processes HTML attachments with advisory followed by call to action" do + edition = create(:published_edition) + attachment = create(:html_attachment, attachable: edition, body: "@example\n$CTA") + create(:gds_team_user, name: "GDS Inside Government Team") + + Rake::Task["remove_advisory:published_html_attachments"].invoke + + new_edition = attachment.attachable.document.latest_edition + new_attachment = new_edition.html_attachments.first + assert_match "^example^", new_attachment.body + end +end