superdesk · devketanpro · Oct 19, 2023 · Oct 19, 2023 · Oct 19, 2023 · Oct 19, 2023
diff --git a/server/stt/common.py b/server/stt/common.py
@@ -6,27 +6,57 @@
 from superdesk import get_resource_service
 from superdesk.metadata.item import ITEM_TYPE, ITEM_STATE
 from planning.common import WORKFLOW_STATE, POST_STATE, update_post_item
+import re
 
 
 def planning_xml_contains_remove_signal(xml: Element) -> bool:
     """Returns ``True`` if the ``sttinstruct:remove`` signal is included, ``False`` otherwise"""
 
     namespaces = {"iptc": "http://iptc.org/std/nar/2006-10-01/"}
-    if xml.xpath("//iptc:itemMeta/iptc:signal[@qcode='sttinstruct:remove']", namespaces=namespaces):
+    if xml.xpath(
+        "//iptc:itemMeta/iptc:signal[@qcode='sttinstruct:remove']",
+        namespaces=namespaces,
+    ):
         return True
     return False
 
 
 def unpost_or_spike_event_or_planning(item: Dict[str, Any]):
     item_resource = "events" if item.get(ITEM_TYPE) == "event" else "planning"
-    original: Dict[str, Any] = get_resource_service(item_resource).find_one(req=None, _id=item["guid"]) or {}
+    original: Dict[str, Any] = (
+        get_resource_service(item_resource).find_one(req=None, _id=item["guid"]) or {}
+    )
 
     if not original.get("pubstatus") and original.get(ITEM_STATE) in [
         WORKFLOW_STATE.INGESTED,
         WORKFLOW_STATE.DRAFT,
         WORKFLOW_STATE.POSTPONED,
         WORKFLOW_STATE.CANCELLED,
     ]:
-        get_resource_service(item_resource + "_spike").patch(original[config.ID_FIELD], original)
+        get_resource_service(item_resource + "_spike").patch(
+            original[config.ID_FIELD], original
+        )
     elif original.get("pubstatus") != POST_STATE.CANCELLED:
-        update_post_item({"pubstatus": POST_STATE.CANCELLED, "_etag": original["_etag"]}, original)
+        update_post_item(
+            {"pubstatus": POST_STATE.CANCELLED, "_etag": original["_etag"]}, original
+        )
+
+
+def transform_link_from_text(item, fields):
+    for field in fields:
+        if item.get(field):
+            url_pattern = re.compile(r"(https?://\S+|www\.\S+|\S+\.\S+)")
+
+            # Replace URLs with anchor tags and update the href attribute
+            def replace(match):
+                url = match.group(0)
+                if url.startswith("www."):
+                    url_with_https = "https://" + url
+                    return f'<a href="{url_with_https}">{url}</a>'
+                elif not url.startswith(("http://", "https://")):
+                    url_with_https = "https://" + url
+                    return f'<a href="{url_with_https}">{url}</a>'
+                else:
+                    return f'<a href="{url}">{url}</a>'
+
+            item[field] = url_pattern.sub(replace, item[field])
diff --git a/server/stt/stt_events_ml.py b/server/stt/stt_events_ml.py
@@ -11,7 +11,11 @@
 from superdesk.errors import SuperdeskApiError
 from planning.feed_parsers.events_ml import EventsMLParser
 
-from .common import planning_xml_contains_remove_signal, unpost_or_spike_event_or_planning
+from .common import (
+    planning_xml_contains_remove_signal,
+    unpost_or_spike_event_or_planning,
+    transform_link_from_text,
+)
 
 logger = logging.getLogger(__name__)
 TIMEZONE = "Europe/Helsinki"
@@ -27,7 +31,19 @@ def search_existing_contacts(contact: Dict[str, Any]) -> Optional[Dict[str, Any]
     contacts_service = get_resource_service("contacts")
     if len(contact.get("contact_email") or []):
         cursor = contacts_service.search(
-            {"query": {"bool": {"must": [{"term": {"contact_email.keyword": contact["contact_email"][0]}}]}}}
+            {
+                "query": {
+                    "bool": {
+                        "must": [
+                            {
+                                "term": {
+                                    "contact_email.keyword": contact["contact_email"][0]
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
         )
         if cursor.count():
             return list(cursor)[0]
@@ -36,31 +52,33 @@ def search_existing_contacts(contact: Dict[str, Any]) -> Optional[Dict[str, Any]
         first_name = contact["first_name"].lower()
         last_name = contact["last_name"].lower()
 
-        cursor = contacts_service.search({
-            "query": {
-                "bool": {
-                    "must": [
-                        {
-                            "match": {
-                                "first_name": {
-                                    "query": first_name.lower(),
-                                    "operator": "AND",
+        cursor = contacts_service.search(
+            {
+                "query": {
+                    "bool": {
+                        "must": [
+                            {
+                                "match": {
+                                    "first_name": {
+                                        "query": first_name.lower(),
+                                        "operator": "AND",
+                                    },
                                 },
                             },
-                        },
-                        {
-                            "match": {
-                                "last_name": {
-                                    "query": last_name.lower(),
-                                    "operator": "AND",
+                            {
+                                "match": {
+                                    "last_name": {
+                                        "query": last_name.lower(),
+                                        "operator": "AND",
+                                    },
                                 },
                             },
-                        },
-                    ],
+                        ],
+                    },
                 },
-            },
-            "sort": ["_score"]
-        })
+                "sort": ["_score"],
+            }
+        )
         if cursor.count():
             return list(cursor)[0]
 
@@ -86,6 +104,20 @@ def parse(self, tree: Element, provider=None):
                 # If the item contains the ``sttinstruct:remove`` signal, no need to ingest this one
                 continue
             self.set_extra_fields(item, tree)
+
+            fields = (
+                "definition_long",
+                "definition_short",
+                "ednote",
+                "internal_note",
+                "name",
+                "slugline",
+                "registration_details",
+                "invitation_details",
+                "accreditation_info",
+            )
+            transform_link_from_text(item, fields)
+
             items_to_ingest.append(item)
 
         return items_to_ingest
@@ -139,17 +171,25 @@ def set_extra_fields(self, item, xml):
             if related is not None and related.get("rel", "") == "sttnat:sttEventType":
                 qcode_parts = related.get("qcode", "").split(":")
                 qcode = qcode_parts[1] if len(qcode_parts) == 2 else qcode_parts
-                qcode = f"type{qcode}"  # add prefix to avoid conflict with sttdepartment
-                name = self.getVocabulary("event_type", qcode, related.find(self.qname("name")).text)
-                item.setdefault("subject", []).append({
-                    "qcode": qcode,
-                    "name": name,
-                    "scheme": "event_type",
-                })
+                qcode = (
+                    f"type{qcode}"  # add prefix to avoid conflict with sttdepartment
+                )
+                name = self.getVocabulary(
+                    "event_type", qcode, related.find(self.qname("name")).text
+                )
+                item.setdefault("subject", []).append(
+                    {
+                        "qcode": qcode,
+                        "name": name,
+                        "scheme": "event_type",
+                    }
+                )
         except AttributeError:
             pass
 
-        self.set_location_details(item, event_details.find(self.qname("location")), location_notes)
+        self.set_location_details(
+            item, event_details.find(self.qname("location")), location_notes
+        )
         self.set_contact_details(item, event_details)
 
     def set_location_details(self, item, location_xml, notes):
@@ -195,13 +235,19 @@ def set_location_details(self, item, location_xml, notes):
             elif values[0] == "sttcountry":
                 location["address"]["extra"]["sttcountry"] = values[1]
                 try:
-                    location["address"]["country"] = broader.find(self.qname("name")).text
-                    location["address"]["extra"]["iso3166"] = broader.find(self.qname("sameAs")).get("qcode")
+                    location["address"]["country"] = broader.find(
+                        self.qname("name")
+                    ).text
+                    location["address"]["extra"]["iso3166"] = broader.find(
+                        self.qname("sameAs")
+                    ).get("qcode")
                 except AttributeError:
                     continue
 
         try:
-            address = location_xml.find(self.qname("POIDetails")).find(self.qname("address"))
+            address = location_xml.find(self.qname("POIDetails")).find(
+                self.qname("address")
+            )
         except AttributeError:
             address = None
 
@@ -212,7 +258,9 @@ def set_location_details(self, item, location_xml, notes):
                 pass
 
             try:
-                location["address"]["postal_code"] = address.find(self.qname("postalCode")).text
+                location["address"]["postal_code"] = address.find(
+                    self.qname("postalCode")
+                ).text
             except AttributeError:
                 pass
 
@@ -242,10 +290,12 @@ def set_contact_details(self, item: Dict[str, Any], event_details: Element):
         if job_title is not None and job_title.text:
             contact["job_title"] = job_title.text
         if phone is not None and phone.text:
-            contact["contact_phone"] = [{
-                "number": phone.text,
-                "public": True,
-            }]
+            contact["contact_phone"] = [
+                {
+                    "number": phone.text,
+                    "public": True,
+                }
+            ]
         if email is not None and email.text:
             contact["contact_email"] = [email.text.lower()]
         if web is not None and web.text:

diff --git a/server/stt/stt_planning_ml.py b/server/stt/stt_planning_ml.py
@@ -10,6 +10,7 @@
 from .common import (
     planning_xml_contains_remove_signal,
     unpost_or_spike_event_or_planning,
+    transform_link_from_text,
 )
 
 TIMEZONE = "Europe/Helsinki"
@@ -41,6 +42,10 @@ def parse(self, tree: Element, provider=None):
             ) if planning_item else self.set_placeholder_coverage(item, tree)
 
             self.set_extra_fields(item, tree)
+
+            fields = ("description_text", "headline", "slugline", "ednote", "abstract")
+            transform_link_from_text(item, fields)
+
             items_to_ingest.append(item)
 
         return items_to_ingest

diff --git a/server/tests/fixtures/planning_ml_link.xml b/server/tests/fixtures/planning_ml_link.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<planningItem xmlns="http://iptc.org/std/nar/2006-10-01/" xmlns:stt="http://www.stt-lehtikuva.fi/NewsML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://iptc.org/std/nar/2006-10-01/ http://www.iptc.org/std/NewsML-G2/2.12/specification/NewsML-G2_2.12-spec-All-Power.xsd http://www.stt-lehtikuva.fi/NewsML http://www.stt-lehtikuva.fi/newsml/schema/STT-Lehtikuva_NewsML_G2.xsd" guid="urn:newsml:stt.fi:20280911:631023" version="1" standard="NewsML-G2" standardversion="2.12" conformance="power" xml:lang="fi">
+<catalogRef href="http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_18.xml"/>
+<catalogRef href="http://www.stt-lehtikuva.fi/newsml/doc/stt-NewsCodesCatalog_1.xml"/>
+<itemMeta>
+<itemClass qcode="plinat:newscoverage"/>
+<provider literal="STT"/>
+<versionCreated>2023-10-02T12:23:48+02:00</versionCreated>
+<pubStatus qcode="stat:usable"/>
+<edNote role="sttdescription:additionalinfo">Testi: Lisätietoja aiheesta erityisesti media-asiakkaille
+
+www.stt.fi STT
+
+stt.fi
+
+https://stt.fi/
+
+STT</edNote>
+</itemMeta>
+<contentMeta>
+<urgency>1</urgency>
+<contentCreated>2023-10-02T12:23:48+02:00</contentCreated>
+<contentModified>2023-10-02T12:23:48+02:00</contentModified>
+<headline>Testi/Luotsi</headline>
+<description role="drol:summary">www.stt.fi 
+
+stt.fi 
+
+https://stt.fi/</description>
+<subject qcode="stt-topics:631023">
+<note role="sttpresent:1">Toimittaja paikalla</note>
+<related rel="sttrel:assigneddate" value="2028-09-11" valuedatatype="Date"/>
+</subject>
+<subject type="cpnat:department" qcode="sttdepartment:3">
+<name>Kotimaa</name>
+</subject>
+</contentMeta>
+<assert qcode="stt-topics:631023">
+<newsCoverageStatus qcode="ncostat:int"/>
+</assert>
+<newsCoverageSet>
+<newsCoverage id="ID_EVENT_286323" modified="2023-10-02T12:23:57+02:00">
+<planning>
+<g2contentType>application/vnd.iptc.g2.newsitem+xml</g2contentType>
+<itemClass qcode="ninat:text"/>
+<headline>Testi/Luotsi</headline>
+<description>www.stt.fi
+
+stt.fi
+
+https://stt.fi/</description>
+
+<subject type="cpnat:event" qcode="urn:newsml:stt.fi:20280911:286323">
+<name>Testi/Luotsi</name>
+</subject>
+</planning>
+</newsCoverage>
+<newsCoverage id="ID_WORKREQUEST_187845">
+<planning>
+<g2contentType>application/vnd.iptc.g2.newsitem+xml</g2contentType>
+<itemClass qcode="ninat:picture"/>
+<scheduled>2023-10-04T00:00:00+02:00</scheduled>
+<headline>Testi/Luotsi</headline>
+<subject type="ninat:text" qcode="urn:newsml:stt.fi:20280911000000:187845">
+<definition role="sttdescription:imagetarget">.</definition>
+<stt:workstartdate>2028-09-11T00:00:00+02:00</stt:workstartdate>
+</subject>
+<genre qcode="sttimage:20">
+<name>Kuvaaja paikalla</name>
+<definition role="sttdescription:imagetype">-</definition>
+</genre>
+</planning>
+</newsCoverage>
+</newsCoverageSet>
+</planningItem>
diff --git a/server/tests/stt_planning_ml_test.py b/server/tests/stt_planning_ml_test.py
@@ -167,3 +167,15 @@ def test_update_planning(self):
             "placeholder_urn:newsml:stt.fi:20230529:620121",
             dest["coverages"][0]["coverage_id"],
         )
+
+    def test_text_link(self):
+        self.fixture = "planning_ml_link.xml"
+        self.parse_source_content()
+        self.assertEqual(
+            self.item["ednote"],
+            """Testi: Lisätietoja aiheesta erityisesti media-asiakkaille\n\n<a href="https://www.stt.fi">www.stt.fi</a> STT\n\n<a href="https://stt.fi">stt.fi</a>\n\n<a href="https://stt.fi/">https://stt.fi/</a>\n\nSTT""",  # noqa
+        )
+        self.assertEqual(
+            self.item["description_text"],
+            '<a href="https://www.stt.fi">www.stt.fi</a> \n\n<a href="https://stt.fi">stt.fi</a> \n\n<a href="https://stt.fi/">https://stt.fi/</a>',  # noqa
+        )