Skip to content

Latest commit

 

History

History
 
 

datahub-gms-graphql-service

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 
 
 
 
 

DataHub GMS GraphQL Service

Datahub GMS GraphQL Service wraps the Generalized Metadata Store (GMS) Rest.li calls around a GraphQL API.

Pre-requisites

  • You need to have JDK8 installed on your machine to be able to build Datahub GMS GraphQL Service.

Build

To build Datahub GMS GraphQL Service

./gradlew :datahub-gms-graphql-service:build

Dependencies

Before starting Datahub GMS GraphQL Service, you need to make sure that DataHub GMS is up and running.

Start via Docker image

Quickest way to try out Datahub GMS Graphql Service is running the Docker image.

Start via command line

If you do modify things and want to try it out quickly without building the Docker image, you can also run the application directly from command line after a successful build:

./gradlew :datahub-gms-graphql-service:bootRun

API Calls

Inorder to Start using the graphql API we would recommend you download GraphiQL

Endpoint: http://localhost:8091/graphql

Sample API Calls

Query Dataset

Request:

{
  dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)") {
    urn
    platform
    name
    origin
    description
    platformNativeType
    uri
    tags
    ownership {
      owners {
        owner {
          username
          urn
          info {
            displayName
            email
            fullName
            manager {
              urn
            }
          }
          editableInfo {
            aboutMe
            skills
          }
        }
        type
        source {
          url
        }
      }
      lastModified {
        actor
      }
    }
    created {
      actor
    }
    lastModified {
      actor
    }
  }
}

Sample Response:

{
  "data": {
    "dataset": {
      "urn": "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)",
      "platform": "urn:li:dataPlatform:foo",
      "name": "bar",
      "origin": "PROD",
      "description": "Sample Dataset",
      "platformNativeType": null,
      "uri": null,
      "tags": [
        "Datahub",
        "Sample"
      ],
      "ownership": {
        "owners": [
          {
            "owner": {
              "username": "fbar",
              "urn": "urn:li:corpuser:fbar",
              "info": {
                "displayName": "Foo Bar",
                "email": "[email protected]",
                "fullName": "Foo Bar",
                "manager": {
                  "urn": "urn:li:corpuser:datahub"
                }
              },
              "editableInfo": {
                "aboutMe": "About Me",
                "skills": [
                  "Java",
                  "SQL"
                ]
              }
            },
            "type": "DATAOWNER",
            "source": null
          }
        ],
        "lastModified": {
          "actor": "urn:li:corpuser:fbar"
        }
      },
      "created": {
        "actor": "urn:li:corpuser:fbar"
      },
      "lastModified": {
        "actor": "urn:li:corpuser:fbar"
      }
    }
  }
}

Query MLModel

Sample Request:

{
  mlModel(urn: "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)") {
    urn
    type
    name
    origin
    description
    tags
    ownership {
      owners {
        owner {
          urn
          username
          editableInfo {
            pictureLink
          }
          info {
            firstName
          }
        }
        type
        source {
          type
          url
        }
      }
    }
    properties {
      description
      date
      version
      type
      hyperParameters {
        key
        value {
          ...on StringBox {
            stringValue
          }
          ... on IntBox {
            intValue
          }
          ... on FloatBox {
            floatValue
          }
          ... on BooleanBox {
            booleanValue
          }
        }
      }
      mlFeatures
      tags
    }
    status {
      removed
    }
    institutionalMemory {
      elements {
        url
        description
        created {
          actor
        }
      }
    }
    intendedUse {
      primaryUses
      primaryUsers
      outOfScopeUses
    }
    factorPrompts {
      relevantFactors {
        groups
        instrumentation
        environment
      }
      evaluationFactors {
        groups
        instrumentation
        environment
      }
    }
    metrics {
      decisionThreshold
      performanceMeasures
    }
    trainingData {
      dataset
      motivation
      preProcessing
    }
    evaluationData {
      dataset
      motivation
      preProcessing
    }
    quantitativeAnalyses {
      unitaryResults {
        ...on StringBox {
          stringValue
        }
      }
      intersectionalResults {
        ...on StringBox {
          stringValue
        }
      }
    }
    ethicalConsiderations {
      useCases
      humanLife
      mitigations
      risksAndHarms
      useCases
      data
    }
    caveatsAndRecommendations {
      caveats {
        caveatDescription
        needsFurtherTesting
        groupsNotRepresented
      }
      recommendations 
      idealDatasetCharacteristics
    }
    cost {
      costType
      costValue {
        costId
        costCode
      }
    }
  }
}

Sample Response:

{
  "data": {
    "mlModel": {
      "urn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)",
      "type": "MLMODEL",
      "name": "scienceModel",
      "origin": "PROD",
      "description": "A sample model for predicting some outcome.",
      "tags": [
        "Sample"
      ],
      "ownership": {
        "owners": [
          {
            "owner": {
              "urn": "urn:li:corpuser:jdoe",
              "username": "jdoe",
              "editableInfo": null,
              "info": {
                "firstName": null
              }
            },
            "type": "DATAOWNER",
            "source": null
          },
          {
            "owner": {
              "urn": "urn:li:corpuser:datahub",
              "username": "datahub",
              "editableInfo": {
                "pictureLink": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web/packages/data-portal/public/assets/images/default_avatar.png"
              },
              "info": {
                "firstName": null
              }
            },
            "type": "DATAOWNER",
            "source": null
          }
        ]
      },
      "properties": {
        "description": "A sample model for predicting some outcome.",
        "date": null,
        "version": null,
        "type": "Naive Bayes classifier",
        "hyperParameters": null,
        "mlFeatures": null,
        "tags": [
          "Sample"
        ]
      },
      "status": {
        "removed": false
      },
      "institutionalMemory": {
        "elements": [
          {
            "url": "https://www.linkedin.com",
            "description": "Sample doc",
            "created": {
              "actor": "urn:li:corpuser:jdoe"
            }
          }
        ]
      },
      "intendedUse": {
        "primaryUses": [
          "Sample Model",
          "Primary Use"
        ],
        "primaryUsers": [
          "ENTERPRISE"
        ],
        "outOfScopeUses": [
          "Production Deployment"
        ]
      },
      "factorPrompts": null,
      "metrics": {
        "decisionThreshold": [
          "decisionThreshold"
        ],
        "performanceMeasures": [
          "performanceMeasures"
        ]
      },
      "trainingData": [
        {
          "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,pageViewsHive,PROD)",
          "motivation": "For science!",
          "preProcessing": [
            "Aggregation"
          ]
        }
      ],
      "evaluationData": [
        {
          "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,pageViewsHive,PROD)",
          "motivation": null,
          "preProcessing": null
        }
      ],
      "quantitativeAnalyses": null,
      "ethicalConsiderations": {
        "useCases": [
          "useCases"
        ],
        "humanLife": [
          "humanLife"
        ],
        "mitigations": [
          "mitigations"
        ],
        "risksAndHarms": [
          "risksAndHarms"
        ],
        "data": [
          "data"
        ]
      },
      "caveatsAndRecommendations": {
        "caveats": null,
        "recommendations": "recommendations",
        "idealDatasetCharacteristics": [
          "idealDatasetCharacteristics"
        ]
      },
      "cost": {
        "costType": "ORG_COST_TYPE",
        "costValue": {
          "costId": null,
          "costCode": "costCode"
        }
      }
    }
  }
}

Query DataFlow

Request:

{
  dataFlow(urn: "urn:li:dataFlow:(airflow,flow1,foo)") {
    urn
    type
    orchestrator
    flowId
    info {
      name
      description
      project
    }
    ownership {
      owners {
        owner {
          username
          urn
          info {
            displayName
            email
            fullName
            manager {
              urn
            }
          }
          editableInfo {
            aboutMe
            skills
          }
        }
        type
        source {
          url
        }
      }
      lastModified {
        actor
      }
    }
	}
}

Sample response:

{
  "data": {
    "dataFlow": {
      "urn": "urn:li:dataFlow:(airflow,flow1,foo)",
      "type": "DATA_FLOW",
      "orchestrator": "airflow",
      "flowId": "flow1",
      "info": {
        "name": "flow1",
        "description": "My own workflow",
        "project": "X"
      },
      "ownership": {
        "owners": [
          {
            "owner": {
              "username": "test-user",
              "urn": "urn:li:corpuser:test-user",
              "info": null,
              "editableInfo": null
            },
            "type": "DEVELOPER",
            "source": null
          }
        ],
        "lastModified": {
          "actor": "urn:li:corpuser:datahub"
        }
      }
    }
  }
}

Query DataJob

Request:

{
  dataJob(urn: "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow1,foo),task1)") {
    urn
    type
    jobId
    dataFlow {
      urn
      flowId
    }
    inputOutput {
      inputDatasets {
        urn
        name
      }
      outputDatasets {
        urn
        name
      }
    }
  }
}

Sample response

{
  "data": {
    "dataJob": {
      "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,flow1,foo),task1)",
      "type": "DATA_JOB",
      "jobId": "task1",
      "dataFlow": {
        "urn": "urn:li:dataFlow:(airflow,flow1,foo)",
        "flowId": "flow1"
      },
      "inputOutput": {
        "inputDatasets": [
          {
            "urn": "urn:li:dataset:(urn:li:dataPlatform:redis,stuff,PROD)",
            "name": "stuff"
          }
        ],
        "outputDatasets": []
      }
    }
  }
}