From edc046501d01d82e66e44b64c48fa6451867240a Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:19:24 -0500 Subject: [PATCH] feat(docs): new use cases examples (#2522) * feat(docs): new use cases examples * chore(docs): bump sidebar indices --- apps/docs/docs/how-oso-works/_category_.json | 2 +- apps/docs/docs/references/_category_.json | 2 +- apps/docs/docs/subscribe.mdx | 2 +- apps/docs/docs/use-cases/collection-view.md | 265 ++++++++++++++++ apps/docs/docs/use-cases/index.md | 17 ++ apps/docs/docs/use-cases/project-deepdive.md | 305 +++++++++++++++++++ 6 files changed, 590 insertions(+), 3 deletions(-) create mode 100644 apps/docs/docs/use-cases/collection-view.md create mode 100644 apps/docs/docs/use-cases/index.md create mode 100644 apps/docs/docs/use-cases/project-deepdive.md diff --git a/apps/docs/docs/how-oso-works/_category_.json b/apps/docs/docs/how-oso-works/_category_.json index 168ea92b..3292e312 100644 --- a/apps/docs/docs/how-oso-works/_category_.json +++ b/apps/docs/docs/how-oso-works/_category_.json @@ -1,6 +1,6 @@ { "label": "Learn How OSO Works", - "position": 4, + "position": 5, "link": { "type": "doc", "id": "index" diff --git a/apps/docs/docs/references/_category_.json b/apps/docs/docs/references/_category_.json index a7733645..187be6fc 100644 --- a/apps/docs/docs/references/_category_.json +++ b/apps/docs/docs/references/_category_.json @@ -1,6 +1,6 @@ { "label": "References", - "position": 5, + "position": 6, "link": { "type": "doc", "id": "index" diff --git a/apps/docs/docs/subscribe.mdx b/apps/docs/docs/subscribe.mdx index d38c67e7..372ac10c 100644 --- a/apps/docs/docs/subscribe.mdx +++ b/apps/docs/docs/subscribe.mdx @@ -1,6 +1,6 @@ --- title: Subscribe For Updates -sidebar_position: 6 +sidebar_position: 9 --- We post updates about once a month on our Substack newsletter. diff --git a/apps/docs/docs/use-cases/collection-view.md b/apps/docs/docs/use-cases/collection-view.md new file mode 100644 index 00000000..389fb1df --- /dev/null +++ b/apps/docs/docs/use-cases/collection-view.md @@ -0,0 +1,265 @@ +--- +title: Analyze a Collection of Projects +sidebar_position: 1 +--- + +Get a high level view of key metrics for a collection of projects. New to OSO? Check out our [Getting Started guide](../get-started/index.mdx) to set up your BigQuery or API access. + +:::tip +All **collections** are defined as YAML files in [OSS Directory](https://github.com/opensource-observer/oss-directory). View our current collections [here](https://github.com/opensource-observer/oss-directory/tree/main/data/collections). +::: + +## BigQuery + +If you haven't already, then the first step is to subscribe to OSO public datasets in BigQuery. You can do this by clicking the "Subscribe" button on our [Datasets page](../integrate/overview/#oso-production-data-pipeline). + +The following queries should work if you copy-paste them into your [BigQuery console](https://console.cloud.google.com/bigquery). + +### All collections + +Get the names of all collections on OSO: + +```sql +select + collection_name, + display_name +from `oso_production.collections_v1` +order by collection_name +``` + +### Projects in a collection + +Get the names of all projects in a collection: + +```sql +select + project_id, + project_name +from `oso_production.projects_by_collection_v1` +where collection_name = 'gg-01' +``` + +### Code metrics + +Get code metrics for all projects in a collection: + +```sql +select cm.* +from `oso_production.code_metrics_by_project_v1` as cm +join `oso_production.projects_by_collection_v1` as pbc + on cm.project_id = pbc.project_id +where pbc.collection_name = 'ethereum-crypto-ecosystems' +``` + +### Onchain metrics + +Get onchain metrics for all projects in a collection: + +```sql +select om.* +from `oso_production.onchain_metrics_by_project_v1` as om +join `oso_production.projects_by_collection_v1` as pbc + on om.project_id = pbc.project_id +where pbc.collection_name = 'optimism' +``` + +### Funding metrics + +Get funding metrics for all projects in a collection: + +```sql +select fm.* +from `oso_production.funding_metrics_by_project_v1` as fm +join `oso_production.projects_by_collection_v1` as pbc + on fm.project_id = pbc.project_id +where pbc.collection_name = 'op-rpgf3' +``` + +## GraphQL + +The following queries should work if you copy-paste them into our [GraphQL sandbox](https://www.opensource.observer/graphql). For more information on how to use the GraphQL API, check out our [GraphQL guide](../integrate/api.md). + +### All collections + +Get the names of all collections on OSO: + +```graphql +query Collections { + oso_collectionsV1 { + collectionName + displayName + } +} +``` + +### Projects in a collection + +Get the names of all projects in a collection: + +```graphql +query ProjectsInCollection { + oso_projectsByCollectionV1(where: { collectionName: { _eq: "gg-01" } }) { + projectId + projectName + } +} +``` + +### Code metrics + +Get code metrics for all projects in a collection. This query returns two tables, `metrics` and `projects`, which can be joined client-side on `projectId`. + +```graphql +query CodeMetricsQuery { + metrics: oso_codeMetricsByProjectV1 { + projectId + starCount + forkCount + commitCount6Months + contributorCount6Months + } + projects: oso_projectsByCollectionV1( + where: { collectionName: { _eq: "ethereum-crypto-ecosystems" } } + ) { + projectId + } +} +``` + +### Onchain metrics + +Get onchain metrics for all projects in a collection. This query returns two tables, `metrics` and `projects`, which can be joined client-side on `projectId`. + +```graphql +query OnchainMetricsQuery { + metrics: oso_onchainMetricsByProjectV1 { + projectId + transactionCount6Months + gasFeesSum6Months + newAddressCount90Days + } + projects: oso_projectsByCollectionV1( + where: { collectionName: { _eq: "optimism" } } + ) { + projectId + } +} +``` + +### Funding metrics + +Get funding metrics for all projects in a collection. This query returns two tables, `metrics` and `projects`, which can be joined client-side on `projectId`. + +```graphql +query FundingMetricsQuery { + metrics: oso_fundingMetricsByProjectV1 { + projectId + totalFundingReceivedUsd6Months + } + projects: oso_projectsByCollectionV1( + where: { collectionName: { _eq: "op-rpgf3" } } + ) { + projectId + } +} +``` + +## Python + +See our guide on [writing Python notebooks](../integrate/python-notebooks.md) for more information on how to connect to BigQuery and query data. Our [Insights Repo](https://github.com/opensource-observer/insights) is full of examples too. + +### Connect to BigQuery + +You can use the following to connect to BigQuery: + +```python +from google.cloud import bigquery +import pandas as pd +import os + +os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = # PATH TO YOUR CREDENTIALS JSON +GCP_PROJECT = # YOUR GCP PROJECT NAME + +client = bigquery.Client(GCP_PROJECT) +``` + +### All collections + +Get the names of all collections on OSO: + +```python +query = """ + select + collection_name, + display_name + from `oso_production.collections_v1` + order by collection_name +""" +df = client.query(query).to_dataframe() +``` + +### Projects in a collection + +Get the names of all projects in a collection: + +```python +query = """ + select + project_id, + project_name + from `oso_production.projects_by_collection_v1` + where collection_name = 'gg-01' +""" +df = client.query(query).to_dataframe() +``` + +### Code metrics + +Get code metrics for all projects in a collection: + +```python +query = """ + select cm.* + from `oso_production.code_metrics_by_project_v1` as cm + join `oso_production.projects_by_collection_v1` as pbc + on cm.project_id = pbc.project_id + where pbc.collection_name = 'ethereum-crypto-ecosystems' +""" +df = client.query(query).to_dataframe() +``` + +### Onchain metrics + +Get onchain metrics for all projects in a collection: + +```python +query = """ + select om.* + from `oso_production.onchain_metrics_by_project_v1` as om + join `oso_production.projects_by_collection_v1` as pbc + on om.project_id = pbc.project_id + where pbc.collection_name = 'optimism' +""" +df = client.query(query).to_dataframe() +``` + +### Funding metrics + +Get funding metrics for all projects in a collection: + +```python +query = """ + select fm.* + from `oso_production.funding_metrics_by_project_v1` as fm + join `oso_production.projects_by_collection_v1` as pbc + on fm.project_id = pbc.project_id + where pbc.collection_name = 'op-rpgf3' +""" +df = client.query(query).to_dataframe() +``` + +## Adding collections and projects + +Projects and collections are defined as YAML files in our [OSS Directory repo](https://github.com/opensource-observer/oss-directory). You can add or update your own collections and projects by submitting a pull request. + +For more information on how collections work, see our guide [here](../how-oso-works/oss-directory/collection.md). diff --git a/apps/docs/docs/use-cases/index.md b/apps/docs/docs/use-cases/index.md new file mode 100644 index 00000000..6d8329e5 --- /dev/null +++ b/apps/docs/docs/use-cases/index.md @@ -0,0 +1,17 @@ +--- +title: Use Case Examples +sidebar_position: 4 +--- + +# Use Case Examples + +Examples of applications and data science built on OSO's data ≠platform. Each example includes code snippets you can copy and paste into your own projects. + +- 📊 [Collection View](./collection-view) - Get a high level view of key metrics for a collection of projects +- 🔬 [Project Deepdive](./project-deepdive) - Do a deep dive into a specific project +- 👥 Cohort Analysis (coming soon) - Track a cohort of projects across a set of metrics over time +- 💸 Retro Funding (coming soon) - Generate algorithms for rewarding contributions retroactively +- 🤝 Developer Retention (coming soon) - View developer churn and retention patterns over time +- 🕸️ Network Graphs (coming soon) - Visualize collaboration patterns and community connections +- ⭐ OpenRank (coming soon) - Run OpenRank on top of any network graph with your own trust seed assumptions +- 🛡️ Trusted Users (coming soon) - Classify users on the basis of different trust signals diff --git a/apps/docs/docs/use-cases/project-deepdive.md b/apps/docs/docs/use-cases/project-deepdive.md new file mode 100644 index 00000000..15f1cbca --- /dev/null +++ b/apps/docs/docs/use-cases/project-deepdive.md @@ -0,0 +1,305 @@ +--- +title: Deep Dive into a Project +sidebar_position: 2 +--- + +Analyze detailed metrics for a specific project. New to OSO? Check out our [Getting Started guide](../get-started/index.mdx) to set up your BigQuery or API access. + +:::tip +All **projects** are defined as YAML files in [OSS Directory](https://github.com/opensource-observer/oss-directory). View our current projects [here](https://github.com/opensource-observer/oss-directory/tree/main/data/projects). +::: + +## BigQuery + +If you haven't already, then the first step is to subscribe to OSO public datasets in BigQuery. You can do this by clicking the "Subscribe" button on our [Datasets page](../integrate/overview/#oso-production-data-pipeline). + +The following queries should work if you copy-paste them into your [BigQuery console](https://console.cloud.google.com/bigquery). + +### Find a project + +Search for projects by name: + +```sql +select + project_id, + project_name, + display_name +from `oso_production.projects_v1` +where lower(display_name) like lower('%merkle%') +``` + +### Find a project by artifact + +Find projects associated with specific artifacts (e.g., GitHub repositories, contracts): + +```sql +select + project_id, + project_name, + artifact_namespace as github_owner, + artifact_name as github_repo +from `oso_production.artifacts_by_project_v1` +where + artifact_source = 'GITHUB' + and artifact_namespace like '%uniswap%' +``` + +### Code metrics + +Get code metrics for a specific project: + +```sql +select + project_name, + display_name, + star_count, + fork_count, + commit_count_6_months, + contributor_count_6_months +from `oso_production.code_metrics_by_project_v1` +where project_name = 'opensource-observer' +``` + +### Timeseries metrics + +Get historical metrics for a project: + +```sql +select + tm.sample_date, + m.metric_name, + tm.amount +from `oso_production.timeseries_metrics_by_project_v0` as tm +join `oso_production.metrics_v0` as m + on tm.metric_id = m.metric_id +join `oso_production.projects_v1` as p + on tm.project_id = p.project_id +where p.project_name = 'wevm' +order by sample_date desc +``` + +### Code contributors + +Get all contributors to a project's GitHub repositories: + +```sql +select + te.time, + a.artifact_name as code_contributor, + abp.artifact_name as github_repo, + te.event_type, + te.amount +from `oso_production.timeseries_events_by_artifact_v0` as te +join `oso_production.artifacts_by_project_v1` as abp + on te.to_artifact_id = abp.artifact_id +join `oso_production.artifacts_v1` a + on te.from_artifact_id = a.artifact_id +where + abp.project_name = 'ipfs' + and te.event_type = 'COMMIT_CODE' +order by te.time desc +``` + +## GraphQL + +The following queries should work if you copy-paste them into our [GraphQL sandbox](https://www.opensource.observer/graphql). For more information on how to use the GraphQL API, check out our [GraphQL guide](../integrate/api.md). + +### Find a project + +Search for projects by name: + +```graphql +query FindProject { + oso_projectsV1(where: { display_name: { _ilike: "%ethereum%" } }) { + projectId + projectName + displayName + } +} +``` + +### Find a project by artifact + +Find projects associated with specific artifacts: + +```graphql +query FindProjectByArtifact { + oso_artifactsByProjectV1( + where: { + artifactNamespace: { _ilike: "%uniswap%" } + artifactSource: { _eq: "GITHUB" } + } + ) { + projectId + projectName + artifactNamespace + artifactName + } +} +``` + +### Code metrics + +Get code metrics for a specific project: + +```graphql +query CodeMetricsForProject { + oso_codeMetricsByProjectV1( + where: { projectName: { _eq: "opensource-observer" } } + ) { + projectName + displayName + starCount + forkCount + commitCount6Months + contributorCount6Months + } +} +``` + +### Timeseries metrics + +Get historical metrics for a project. This query returns two tables, `timeseriesMetrics` and `metrics`, which can be joined client-side on `metricId`. + +```graphql +query TimeseriesMetrics { + timeseriesMetrics: oso_timeseriesMetricsByProjectV0( + where: { + projectId: { _eq: "Erx9J64anc8oSeN-wDKm0sojJf8ONrFVYbQ7GFnqSyc=" } + } + ) { + sampleDate + metricId + amount + } + metrics: oso_metricsV0 { + metricId + metricName + } +} +``` + +## Python + +See our guide on [writing Python notebooks](../integrate/python-notebooks.md) for more information on how to connect to BigQuery and query data. Our [Insights Repo](https://github.com/opensource-observer/insights) is full of examples too. + +### Connect to BigQuery + +You can use the following to connect to BigQuery: + +```python +from google.cloud import bigquery +import pandas as pd +import os + +os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = # PATH TO YOUR CREDENTIALS JSON +GCP_PROJECT = # YOUR GCP PROJECT NAME + +client = bigquery.Client(GCP_PROJECT) +``` + +### Find a project + +Search for projects by name: + +```python +query = """ + select + project_id, + project_name, + display_name + from `oso_production.projects_v1` + where lower(display_name) like lower('%merkle%') +""" +df = client.query(query).to_dataframe() +``` + +### Find a project by artifact + +Find projects associated with specific artifacts: + +```python +query = """ + select + project_id, + project_name, + artifact_namespace as github_owner, + artifact_name as github_repo + from `oso_production.artifacts_by_project_v1` + where + artifact_source = 'GITHUB' + and artifact_namespace like '%uniswap%' +""" +df = client.query(query).to_dataframe() +``` + +### Code metrics + +Get code metrics for a specific project: + +```python +query = """ + select + project_name, + display_name, + star_count, + fork_count, + commit_count_6_months, + contributor_count_6_months + from `oso_production.code_metrics_by_project_v1` + where project_name = 'opensource-observer' +""" +df = client.query(query).to_dataframe() +``` + +### Timeseries metrics + +Get historical metrics for a project: + +```python +query = """ + select + tm.sample_date, + m.metric_name, + tm.amount + from `oso_production.timeseries_metrics_by_project_v0` as tm + join `oso_production.metrics_v0` as m + on tm.metric_id = m.metric_id + join `oso_production.projects_v1` as p + on tm.project_id = p.project_id + where p.project_name = 'wevm' + order by sample_date desc +""" +df = client.query(query).to_dataframe() +``` + +### Code contributors + +Get all contributors to a project's GitHub repositories: + +```python +query = """ + select + te.time, + a.artifact_name as code_contributor, + abp.artifact_name as github_repo, + te.event_type, + te.amount + from `oso_production.timeseries_events_by_artifact_v0` as te + join `oso_production.artifacts_by_project_v1` as abp + on te.to_artifact_id = abp.artifact_id + join `oso_production.artifacts_v1` a + on te.from_artifact_id = a.artifact_id + where + abp.project_name = 'ipfs' + and te.event_type = 'COMMIT_CODE' + order by te.time desc +""" +df = client.query(query).to_dataframe() +``` + +## Adding projects + +Projects are defined as YAML files in our [OSS Directory repo](https://github.com/opensource-observer/oss-directory). You can add or update your own projects or project artifacts by submitting a pull request. + +For more information on how projects work, see our guide [here](../how-oso-works/oss-directory/project.md).