diff --git a/ekg-mm/ekg-mm-vars.tex b/ekg-mm/ekg-mm-vars.tex index 7cf32855..23c738eb 100644 --- a/ekg-mm/ekg-mm-vars.tex +++ b/ekg-mm/ekg-mm-vars.tex @@ -34,7 +34,7 @@ \input{../../ekg-mm/fragments-capability-contribution-to-enterprise/#1.tex}% }{ \typeout{TODO: Missing ../../ekg-mm/fragments-capability-contribution-to-enterprise/#1.tex} - The \currentname capability contributes to the enterprise as follows... (todo, we welcome your input) + We welcome your input here: The \currentname capability contributes to the enterprise as follows... } } @@ -42,10 +42,9 @@ \ekgmmCapabilitySectionContributionToEKG% \IfFileExists{../../ekg-mm/fragments-capability-contribution-to-ekg/#1.tex}{ \input{../../ekg-mm/fragments-capability-contribution-to-ekg/#1.tex}% - }{ }{ \typeout{TODO: Missing ../../ekg-mm/fragments-capability-contribution-to-ekg/#1.tex} - The \currentname capability contributes to the \gls{ekg} as follows... (todo, we welcome your input) + We welcome your input here: The \currentname capability contributes to the \gls{ekg} as follows... } } diff --git a/ekg-mm/fragments-capability-contribution-to-ekg/a-1-1.tex b/ekg-mm/fragments-capability-contribution-to-ekg/a-1-1.tex index 18d61031..2700fa9a 100644 --- a/ekg-mm/fragments-capability-contribution-to-ekg/a-1-1.tex +++ b/ekg-mm/fragments-capability-contribution-to-ekg/a-1-1.tex @@ -1,4 +1,9 @@ % % A.1.1 Business Vision -- Contribution to the EKG % -We welcome your input here +Having a clearly communicated Business Vision will help setting the direction for the discovery and selection of +use cases for the \gls{ekg}. + +At higher levels of maturity (level 3 and up) the Business Vision itself would be a key use case for the \gls{ekg}, +allowing any other part of the \gls{ekg} to be directly or indirectly relatable to the vision, goals, providing +insights into which components of the organization (people, systems, products etc.) are supporting the vision and how. diff --git a/ekg-mm/fragments-capability-contribution-to-ekg/a-1-2.tex b/ekg-mm/fragments-capability-contribution-to-ekg/a-1-2.tex index 9c7926e8..97c7e7f3 100644 --- a/ekg-mm/fragments-capability-contribution-to-ekg/a-1-2.tex +++ b/ekg-mm/fragments-capability-contribution-to-ekg/a-1-2.tex @@ -1,4 +1,10 @@ % % A.1.2 Business Goals - Contribution to the EKG % -We welcome your input here +Having clearly communicated, actionable and measurable Business Goals will help setting the direction +for the discovery and selection of use cases for the \gls{ekg}. + +At higher levels of maturity (level 3 and up), all Business Goals, at any level of granularity and scope, +would end up being modelled and become part of the \gls{ekg}, directly or indirectly linked to every activity +in the organization, providing insights in cost, progress, effort and risks per stated goal. + diff --git a/ekg-mm/fragments-capability-contribution-to-ekg/a-1-3.tex b/ekg-mm/fragments-capability-contribution-to-ekg/a-1-3.tex index e5fc827d..2616a512 100644 --- a/ekg-mm/fragments-capability-contribution-to-ekg/a-1-3.tex +++ b/ekg-mm/fragments-capability-contribution-to-ekg/a-1-3.tex @@ -1,4 +1,5 @@ % % A.1.3 Business Tactics -- Contribution to the EKG % -We welcome your input here +We welcome your input here. +\todo[inline]{Create section contribution to the EKG for \currentname} diff --git a/ekg-mm/fragments-capability-summary/d-2-12.tex b/ekg-mm/fragments-capability-summary/d-2-12.tex index fd85094d..ee2546a8 100644 --- a/ekg-mm/fragments-capability-summary/d-2-12.tex +++ b/ekg-mm/fragments-capability-summary/d-2-12.tex @@ -1,25 +1,45 @@ % % D.2.12 Knowledge Graph Federation -- Summary % -\ifoptionfinal{ -\TODO[inline]{Create summary for \thesection} -}{ -Knowledge Graphs can be built from combined data sources from a queryable service layer and API. -Enterprises inherently have this need as their data footprints are vast, -and usually do not store their data in a single source instance. -Federation can be applied to the knowledge graph by leveraging linked data facilities. -Various query protocols such as the SPARQL query language has a SERVICE facility to combine remote endpoints, -and GraphQL can combine APIs. +An \glslocalreset{ekg}\gls{ekg} can be seen by end-users as "one thing", +a "holistic" collection of all connected data, similar to the web. + +However, as also specified in principle \Nameref{sec:ekg-principle-distributed}, the \gls{ekg} is distributed by +nature, assuming that it is not realistic in very large organizations or even eco-systems to have only one physical +implementation of a fully centralized \gls{ekg}. +That means that different parts of the \gls{ekg} are served by different installations or +deployments owned and controlled by different parts of the organization or even other organizations. + +Each deployment can be configured to connect to any number of "backend" data sources +(or destinations/sinks), some of which can be real triple stores (aka RDF Databases, quad stores or semantic graph databases) +and some of which can be relational databases, key/value stores or any other database technologies. +Or even just services with \glspl{api} that are used to get or store data. + +At higher levels of \gls{ekg} platform maturity, all access to the \gls{ekg} is provided via this service layer\,---\,generally +called the \gls{ekg:platform}\,---\,that takes care of federation of any request to any backend data source using any technology. + +All that technology is however hidden for the user. +In that sense an \gls{ekg:platform} is just a \gls{soa} layer\footnote{See \url{https://en.wikipedia.org/wiki/Service-oriented_architecture}}. +However, it is a fully model-driven \gls{soa} layer and one that works with all other known deployments of the \gls{ekg:platform}. + +The federation facilities provided by the \gls{ekg:platform} are leveraging the principles of the linked data standard\cite{linked-data} +as originally defined by Sir Tim Berners-Lee\index{tim berners-lee} in 2005. +However, the original linked data standard does not provide many of the facilities that are required for mission-critical +enterprise use cases such as model-driven entitlement enforcement, automatic selection of the right version of the truth +for the given context and so forth. + +Various query protocols have built-in federation facilties, for instance the SPARQL\index{sparql} query language has a +facility\,---\,via the \lstinline|SERVICE| keyword\,---\,to federate a query across multiple remote endpoints and +the GraphQL\index{graphql} query language can combine \glspl{api} of multiple remote systems. Regardless of the query protocol used, federation provides a link that abstracts the underlying system in a way that seamlessly ties sources together. -A knowledge graph provides capabilities to federate queries on the backend. -If the knowledge graph is discoverable via a service or endpoint, + +An \gls{ekg:platform} provides capabilities to federate queries on the backend. + +The \gls{ekg:platform} is discoverable by other services, \glspl{ekg} or browsers a client can also federate remote knowledge graphs not only in server backends, but even in browser based faceted implementations. -Combining sources with federation is usually done in a select/read goal, but not create/update/delete. -No assumptions should be made when queries are performed via federation that they are done in a transacted manner. -An extension of federation is virtualization, which will be discussed in further section. +An extension of federation is \iindex{virtualization}, which will be discussed in further section. Virtualization provides linked data transformation from a source not designed for knowledge graph -in a materialized or ad-hoc manner using a mapping facility. -} \ No newline at end of file +in a materialized or ad-hoc manner using a mapping facility. \ No newline at end of file diff --git a/ekg-mm/fragments-capability-summary/d-2-13.tex b/ekg-mm/fragments-capability-summary/d-2-13.tex index 0c3ce400..d55edec9 100644 --- a/ekg-mm/fragments-capability-summary/d-2-13.tex +++ b/ekg-mm/fragments-capability-summary/d-2-13.tex @@ -1,16 +1,54 @@ % % D.2.13 Knowledge Graph Virtualization -- Summary % -\ifoptionfinal{ -\TODO[inline]{Create summary for \thesection} -}{ +(Work in progress) + Virtualization provides knowledge graphs the capability to source data not designed for linked data concepts like relational data sources. -By the use of RDF mapping languages relational data is mapped to knowledge graphs leveraging tools that -implement RML, R2RML, OBDA, YARRRML, and D2RQ specifications. + +It is one of the many techniques that are available in the arsenal of \gls{ekg} engineers to get data from a given +source to be "connected" to all other data in the \gls{ekg}. + +Generally these are the options at a high level: + +\begin{itemize} + \item The \gls{ekg} itself is the authoritative source of a given dataset. + \begin{itemize} + \item The dataset is complex + \begin{itemize} + \item Use a triple store (no virtualization needed) + \end{itemize} + \item The dataset has a very large volume (> hundreds of billions of facts) + \begin{itemize} + \item Use a specialized database type and apply virtualization. + \end{itemize} + \end{itemize} + \item The \gls{ekg} is not the authoritative source of a given dataset. + \begin{itemize} + \item The data is relatively clean and there is either a real-time requirement or a massive volume of data. + \begin{itemize} + \item Virtualization\,---\,as provided by various vendors like \stardog and \graphdb\,---\,may be a + good option because it gives an easy path from \iindex{SPARQL} to whichever backend database, + translating SPARQL "on-the-fly" to \iindex{SQL} or other query languages. + \end{itemize} + \item None of the above: + \begin{itemize} + \item Process all data from the given source in one batch \gls{etl} pipeline, + store it in a triple store controlled by the \gls{ekg:platform} and serve it from there. + No virtualization needed, therefore more opportunities to enhance quality in the pipeline (since + virtualization often comes at the price of being less flexible in terms of available options to enhance + data quality). + \end{itemize} + \end{itemize} +\end{itemize} + +By the use of RDF mapping languages, relational data is mapped to semantic knowledge graphs leveraging tools that +implement RML\index{tool!RML}, R2RML\index{tool!R2RML}, OBDA\index{tool!R2RML}, YARRRML\index{tool!YARRRML}, +D2RQ\index{tool!D2RQ} and many vendor specifications. + Lossless conversion to an RDF data model can be achieved by defining terms to relational schema definitions of columns, type, and tables in mapping definitions. -Depending on the tool used to performe the virtualization, +Depending on the tool used to perform the virtualization, data can be materialized into RDF files and/or ad-hoc on demand. A knowledge graph virtualization tool that operates on relational data will execute a SQL query and convert the results to RDF. @@ -31,4 +69,4 @@ The less prescriptive approach is to allow data simply to be converted to an RDF model, later to be modified by SPARQL queries. The more prescriptive approach requires more diligence in model mapping. -} + diff --git a/ekg-mm/sections/b-3-delivery-management-3-dataops-process.tex b/ekg-mm/sections/b-3-delivery-management-3-dataops-process.tex index 5803939e..d0d231cd 100644 --- a/ekg-mm/sections/b-3-delivery-management-3-dataops-process.tex +++ b/ekg-mm/sections/b-3-delivery-management-3-dataops-process.tex @@ -2,5 +2,3 @@ % B.3.3 DataOps process % \ekgmmCapability{b-3-3}{dataops-process}{DataOps Process} - -We welcome your input here. diff --git a/latex-lib/acronyms.tex b/latex-lib/acronyms.tex index 54e5e295..1b497278 100644 --- a/latex-lib/acronyms.tex +++ b/latex-lib/acronyms.tex @@ -150,6 +150,7 @@ \newacronym{smart}{SMART}{Specific, Measurable, Achievable, Realistic, Timely} \newacronym{smcr}{SMCR}{Senior Managers Certification Regime} \newacronym{sme}{SME}{Subject Matter Expert} +\newacronym{soa}{SOA}{service-oriented architecture} \newacronym{sow}{SoW}{Statement of Work} \newacronym{sparql}{SPARQL}{SPARQL Protocol and RDF Query Language} \newacronym{spa}{SPA}{Single-Page-Apps}