From 949874161152da347dd978c994a67d58fc857134 Mon Sep 17 00:00:00 2001
From: Rebecca Pearce <17481621+beccapearce@users.noreply.github.com>
Date: Fri, 29 Nov 2024 01:53:34 +0000
Subject: [PATCH] Add rake task to remove uses of advisory govspeak

Creates 2 temporary rakes task which:
- Finds all the uses of the govspeak advisory component in edition body
and html attachment body
- Changes the uses of advisory into information callouts (as recommended
in the [govspeak README](https://github.com/alphagov/govspeak/blob/main/README.md#advisory-deprecated-marked-for-removal-use-information-callouts-instead))
- Adds these changes to a new draft of the edition and publishes it as a
minor update
- Outputs the successes and failures of the task with any errors.

We are removing the uses of advisory since it has been deprecated for [4
years](https://github.com/alphagov/govspeak/pull/195), and it will make
our code easier to maintain.

Trello card: https://trello.com/c/0aGTHsml/2975-prep-dev-work-to-replace-uses-of-advisory-component-with-information-call-out
---
 lib/govspeak/embedded_content_patterns.rb     |   1 +
 lib/govspeak/remove_advisory_service.rb       | 125 +++++++++++++++++
 lib/tasks/remove_advisory.rake                | 127 ++++++++++++++++++
 .../govspeak/remove_advisory_service_test.rb  |  51 +++++++
 test/unit/lib/tasks/remove_advisory_test.rb   |  75 +++++++++++
 5 files changed, 379 insertions(+)
 create mode 100644 lib/govspeak/remove_advisory_service.rb
 create mode 100644 lib/tasks/remove_advisory.rake
 create mode 100644 test/unit/lib/govspeak/remove_advisory_service_test.rb
 create mode 100644 test/unit/lib/tasks/remove_advisory_test.rb

diff --git a/lib/govspeak/embedded_content_patterns.rb b/lib/govspeak/embedded_content_patterns.rb
index 784f51a4e8f..d938972e02f 100644
--- a/lib/govspeak/embedded_content_patterns.rb
+++ b/lib/govspeak/embedded_content_patterns.rb
@@ -4,5 +4,6 @@ module EmbeddedContentPatterns
     ADMIN_EDITION_PATH = %r{/admin/(?:#{Whitehall.edition_route_path_segments.join('|')})/(\d+)}
     ADMIN_ORGANISATION_CIP_PATH = %r{/admin/organisations/([\w-]+)/corporate_information_pages/(\d+)}
     ADMIN_WORLDWIDE_ORGANISATION_CIP_PATH = %r{/admin/worldwide_organisations/([\w-]+)/corporate_information_pages/(\d+)}
+    ADVISORY = /(^@)([\s\S]*?)(@?)(?=(?:^\$CTA|\r?\n\r?\n|^@|$))/m
   end
 end
diff --git a/lib/govspeak/remove_advisory_service.rb b/lib/govspeak/remove_advisory_service.rb
new file mode 100644
index 00000000000..7572de876da
--- /dev/null
+++ b/lib/govspeak/remove_advisory_service.rb
@@ -0,0 +1,125 @@
+module Govspeak
+  class RemoveAdvisoryService
+    attr_reader :body
+
+    def initialize(object, dry_run: true)
+      @object = object
+      @body = object.body || object.govspeak_content.body
+      @whodunnit = User.find_by(name: "GDS Inside Government Team")
+      @dry_run = dry_run
+    end
+
+    def process!
+      if @dry_run
+        matches = find_all_advisories(body)
+        puts "\n[DRY RUN] Advisory changes detected for #{@object.title}, (ID: #{@object.id}):"
+        puts "belongs to #{@object.attachable.title}" if @object.is_a?(HtmlAttachment)
+        puts "----------------------------------"
+        matches.each do |match|
+          puts "Old advisory:\n#{match[:old]}"
+          puts "New advisory:\n#{match[:new]}"
+          puts "----------------------------------"
+        end
+        return
+      end
+      if @object.is_a?(Edition)
+        AuditTrail.acting_as(@whodunnit) do
+          # Create a new draft of the edition
+          draft = @object.create_draft(@whodunnit)
+
+          # Replace advisories in the body of the edition
+          new_body = replace_all_advisories(body)
+
+          # Update the draft edition with the new body and set to minor change
+          draft.update!(
+            body: new_body,
+            minor_change: true,
+          )
+          submit_and_publish!(draft)
+        end
+      elsif @object.is_a?(HtmlAttachment)
+        AuditTrail.acting_as(@whodunnit) do
+          # Create a draft of the edition the attachment belongs to
+          draft = @object.attachable.create_draft(@whodunnit)
+
+          # Find the relevant attachment in the new draft
+          new_attachment = draft.html_attachments.find_by(slug: @object.slug)
+
+          # Replace advisories in the body of the new attachment
+          new_body = replace_all_advisories(new_attachment.body)
+          new_attachment.govspeak_content.update!(body: new_body)
+
+          # Set the owning draft edition to be a minor change
+          draft.update!(minor_change: true)
+          submit_and_publish!(draft)
+        end
+      else
+        raise "Unsupported object type: #{@object.class.name}"
+      end
+    end
+
+    def submit_and_publish!(draft)
+      # Submit the draft so it is ready to be published
+      draft.submit!
+
+      # Add a reason for force publishing
+      publish_reason = "Replacing deprecated advisory elements with information callouts"
+
+      # Publish the edition
+      edition_publisher = Whitehall.edition_services.publisher(draft, user: @whodunnit, remark: publish_reason)
+      edition_publisher.perform!
+    end
+
+    def replace_all_advisories(body_content)
+      match = advisory_match_group(body_content)
+      return body_content if match.nil?
+
+      new_body = replace_advisory_with_information_callout(match, body_content)
+      replace_all_advisories(new_body)
+    end
+
+    def advisory_match_group(body_content)
+      match_data = body_content.match(regexp_for_advisory_markup)
+      return unless match_data
+
+      {
+        opening_at: match_data[1],
+        content_after_at: match_data[2],
+        closing_at: match_data[3],
+        other_possible_line_ends: match_data[4],
+      }
+    end
+
+    def regexp_for_advisory_markup
+      opening_at = "(^@)"
+      content_after_at = '([\s\S]*?)'
+      closing_at = "(@?)"
+      other_possible_line_ends = '(?:^\$CTA|\r?\n\r?\n|^@|$)'
+      Regexp.new("#{opening_at}#{content_after_at}#{closing_at}(?=#{other_possible_line_ends})", Regexp::MULTILINE)
+    end
+
+    def replace_advisory_with_information_callout(match, body_content)
+      string_to_modify = if match[:closing_at].present?
+                           match[:opening_at] + match[:content_after_at] + match[:closing_at]
+                         else
+                           match[:opening_at] + match[:content_after_at]
+                         end
+
+      body_content.gsub(string_to_modify, information_calloutify(match[:content_after_at]))
+    end
+
+    def information_calloutify(string)
+      "^#{string}^"
+    end
+
+    def find_all_advisories(body_content)
+      matches = []
+      body_content.scan(regexp_for_advisory_markup) do |opening_at, content_after_at, closing_at|
+        old = closing_at.present? ? "#{opening_at}#{content_after_at}#{closing_at}" : "#{opening_at}#{content_after_at}"
+        new = information_calloutify(content_after_at)
+        matches << { old: old, new: new }
+      end
+      matches
+    end
+  end
+end
diff --git a/lib/tasks/remove_advisory.rake b/lib/tasks/remove_advisory.rake
new file mode 100644
index 00000000000..c4f2e9d4546
--- /dev/null
+++ b/lib/tasks/remove_advisory.rake
@@ -0,0 +1,127 @@
+namespace :remove_advisory do
+  desc "Process advisory govspeak in published editions"
+  task published_editions: :environment do
+    regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s
+    successes = []
+    failures = []
+    published_content_containing_advisory_govspeak = []
+
+    puts "\nProcessing published editions...\n"
+
+    Edition
+      .where(state: "published")
+      .joins("RIGHT JOIN edition_translations ON edition_translations.edition_id = editions.id")
+      .where("body REGEXP ?", regex)
+      .find_each do |object|
+        published_content_containing_advisory_govspeak << object.document_id
+      end
+
+    published_content_containing_advisory_govspeak.each do |document_id|
+      edition = Document.find(document_id).latest_edition
+      Govspeak::RemoveAdvisoryService.new(edition, dry_run: false).process!
+      successes << edition.content_id
+      print "S"
+    rescue StandardError => e
+      failures << { content_id: edition.content_id, error: e.message }
+      print "F"
+    end
+
+    summarize_results(successes, failures)
+  end
+
+  desc "Dry run to show which editions would have advisory govspeak processed"
+  task dry_run_published_editions: :environment do
+    regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s
+
+    successes = []
+    failures = []
+    published_content_containing_advisory_govspeak = []
+
+    puts "\nStarting dry run of published editions...\n"
+
+    Edition
+      .where(state: "published")
+      .joins("RIGHT JOIN edition_translations ON edition_translations.edition_id = editions.id")
+      .where("body REGEXP ?", regex)
+      .find_each do |object|
+        published_content_containing_advisory_govspeak << object.document_id
+      end
+
+    published_content_containing_advisory_govspeak.each do |document_id|
+      edition = Document.find(document_id).latest_edition
+      Govspeak::RemoveAdvisoryService.new(edition, dry_run: true).process!
+      successes << edition.content_id
+      print "S"
+    rescue StandardError => e
+      failures << { content_id: edition.content_id, error: e.message }
+      print "F"
+    end
+
+    summarize_results(successes, failures)
+  end
+
+  desc "Process advisory govspeak in published HTML attachments"
+  task published_html_attachments: :environment do
+    regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s
+
+    successes = []
+    failures = []
+
+    puts "\nProcessing published HTML attachments...\n"
+
+    HtmlAttachment
+      .joins(:govspeak_content)
+      .where(deleted: false)
+      .where.not(attachable: nil)
+      .where("govspeak_contents.body REGEXP ?", regex)
+      .find_each do |attachment|
+        next if attachment.attachable.respond_to?(:state) && attachment.attachable.state != "published"
+
+        Govspeak::RemoveAdvisoryService.new(attachment, dry_run: false).process!
+        successes << attachment.content_id
+        print "S"
+    rescue StandardError => e
+      failures << { content_id: attachment.content_id, error: e.message }
+      print "F"
+      end
+
+    summarize_results(successes, failures)
+  end
+end
+
+desc "Dry run to show which HTML publications would have advisory govspeak processed"
+task dry_run_published_html_attachments: :environment do
+  regex = Govspeak::EmbeddedContentPatterns::ADVISORY.to_s
+
+  successes = []
+  failures = []
+
+  puts "\nStarting dry run of published HTML attachments...\n"
+
+  HtmlAttachment
+    .joins(:govspeak_content)
+    .where(deleted: false)
+    .where.not(attachable: nil)
+    .where("govspeak_contents.body REGEXP ?", regex)
+    .find_each do |attachment|
+      next if attachment.attachable.respond_to?(:state) && attachment.attachable.state != "published"
+
+      Govspeak::RemoveAdvisoryService.new(attachment, dry_run: true).process!
+      successes << attachment.content_id
+      print "S"
+  rescue StandardError => e
+    failures << { content_id: attachment.content_id, error: e.message }
+    print "F"
+    end
+
+  summarize_results(successes, failures)
+end
+
+def summarize_results(successes, failures)
+  puts "\n\nSummary:\n"
+  puts "Successes: #{successes.count}"
+  puts "Failures: #{failures.count}"
+  failures.each do |failure|
+    puts "Failed Content ID: #{failure[:content_id]}, Error: #{failure[:error]}"
+  end
+end
diff --git a/test/unit/lib/govspeak/remove_advisory_service_test.rb b/test/unit/lib/govspeak/remove_advisory_service_test.rb
new file mode 100644
index 00000000000..e665a101100
--- /dev/null
+++ b/test/unit/lib/govspeak/remove_advisory_service_test.rb
@@ -0,0 +1,51 @@
+require "test_helper"
+
+class Govspeak::RemoveAdvisoryServiceTest < ActiveSupport::TestCase
+  test "advisory_match_group matches if the line begins with an @, and ends with a carriage return" do
+    body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.\r\n\r\n"
+    edition = create(:published_edition, body:)
+
+    expected = {
+      opening_at: "@",
+      content_after_at: " New online safety legislation is coming which will aim to reduce online harms.",
+      closing_at: "",
+      other_possible_line_ends: nil,
+    }
+    service = Govspeak::RemoveAdvisoryService.new(edition)
+    assert_equal expected, service.advisory_match_group(body)
+  end
+
+  test "advisory_match_group matches if the line begins with an @, and ends with an @" do
+    body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.@\r\n\r\n"
+    edition = create(:published_edition, body:)
+
+    expected = {
+      opening_at: "@",
+      content_after_at: " New online safety legislation is coming which will aim to reduce online harms.",
+      closing_at: "@",
+      other_possible_line_ends: nil,
+    }
+    service = Govspeak::RemoveAdvisoryService.new(edition)
+    assert_equal expected, service.advisory_match_group(body)
+  end
+
+  test "replace_all_advisories can replace a single advisory" do
+    body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.@\r\n\r\n"
+    edition = create(:published_edition, body:)
+    service = Govspeak::RemoveAdvisoryService.new(edition)
+
+    expected = "\r\n^ New online safety legislation is coming which will aim to reduce online harms.^\r\n\r\n"
+
+    assert_equal expected, service.replace_all_advisories(edition.body)
+  end
+
+  test "replace_all_advisories can replace multiple advisories" do
+    body = "\r\n@ New online safety legislation is coming which will aim to reduce online harms.@\r\n\r\n@ And here's another. @\r\n\r\n"
+    edition = create(:published_edition, body:)
+    service = Govspeak::RemoveAdvisoryService.new(edition)
+
+    expected = "\r\n^ New online safety legislation is coming which will aim to reduce online harms.^\r\n\r\n^ And here's another. ^\r\n\r\n"
+
+    assert_equal expected, service.replace_all_advisories(edition.body)
+  end
+end
diff --git a/test/unit/lib/tasks/remove_advisory_test.rb b/test/unit/lib/tasks/remove_advisory_test.rb
new file mode 100644
index 00000000000..ad73c114f19
--- /dev/null
+++ b/test/unit/lib/tasks/remove_advisory_test.rb
@@ -0,0 +1,75 @@
+require "test_helper"
+require "rake"
+
+class RemoveAdvisoryTasksTest < ActiveSupport::TestCase
+  teardown do
+    Rake::Task["remove_advisory:published_editions"].reenable
+    Rake::Task["remove_advisory:published_html_attachments"].reenable
+  end
+
+  test "published_editions processes editions with advisory" do
+    edition = create(:published_edition, body: "@example@")
+    create(:gds_team_user, name: "GDS Inside Government Team")
+
+    Rake::Task["remove_advisory:published_editions"].invoke
+
+    new_edition = edition.document.latest_edition
+    assert_match "^example^", new_edition.body
+  end
+
+  test "published_editions processes editions with advisory followed by 2 empty lines" do
+    edition = create(:published_edition, body: "@example\n\n")
+    create(:gds_team_user, name: "GDS Inside Government Team")
+
+    Rake::Task["remove_advisory:published_editions"].invoke
+
+    new_edition = edition.document.latest_edition
+    assert_match "^example^\n\n", new_edition.body
+  end
+
+  test "published_editions processes editions with advisory followed by call to action" do
+    edition = create(:published_edition, body: "@example\n$CTA")
+    create(:gds_team_user, name: "GDS Inside Government Team")
+
+    Rake::Task["remove_advisory:published_editions"].invoke
+
+    new_edition = edition.document.latest_edition
+    assert_match "^example^\n$CTA", new_edition.body
+  end
+
+  test "published_html_attachments processes HTML attachments with plain advisory" do
+    edition = create(:published_edition)
+    attachment = create(:html_attachment, attachable: edition, body: "@example@")
+    create(:gds_team_user, name: "GDS Inside Government Team")
+
+    Rake::Task["remove_advisory:published_html_attachments"].invoke
+
+    new_edition = attachment.attachable.document.latest_edition
+    new_attachment = new_edition.html_attachments.first
+    assert_match "^example^", new_attachment.body
+  end
+
+  test "published_html_attachments processes HTML attachments with advisory followed by blank lines" do
+    edition = create(:published_edition)
+    attachment = create(:html_attachment, attachable: edition, body: "@example\n\n")
+    create(:gds_team_user, name: "GDS Inside Government Team")
+
+    Rake::Task["remove_advisory:published_html_attachments"].invoke
+
+    new_edition = attachment.attachable.document.latest_edition
+    new_attachment = new_edition.html_attachments.first
+    assert_match "^example^", new_attachment.body
+  end
+
+  test "published_html_attachments processes HTML attachments with advisory followed by call to action" do
+    edition = create(:published_edition)
+    attachment = create(:html_attachment, attachable: edition, body: "@example\n$CTA")
+    create(:gds_team_user, name: "GDS Inside Government Team")
+
+    Rake::Task["remove_advisory:published_html_attachments"].invoke
+
+    new_edition = attachment.attachable.document.latest_edition
+    new_attachment = new_edition.html_attachments.first
+    assert_match "^example^", new_attachment.body
+  end
+end