-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: removes artifact_type from artifacts, moves to a separate table (#…
…1543) * Previously, we were DISTINCT on source, namespace, name, AND type. * This means that the same address was shown multiple times per artifact_type * This refactor creates an intermediate model called int_all_artifacts that we can use * artifact_type is removed from artifacts_v1 and artifacts_by_project_v1 * there's a new int_artifact_types table that we can use to get types
- Loading branch information
Showing
13 changed files
with
251 additions
and
236 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
218 changes: 218 additions & 0 deletions
218
warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
{# | ||
This model is responsible for generating a list of all artifacts associated with a project. | ||
This includes repositories, npm packages, blockchain addresses, and contracts. | ||
|
||
Note: This will create a separate row for each artifact_type, which is de-duplicated | ||
in int_artifacts_by_project | ||
Note: Currently, the source and namespace for blockchain artifacts are the same. This may change | ||
in the future. | ||
#} | ||
|
||
with all_repos as ( | ||
{# | ||
Currently this is just Github. | ||
oss-directory needs some refactoring to support multiple repository providers | ||
#} | ||
select | ||
"GITHUB" as artifact_source, | ||
"REPOSITORY" as artifact_type, | ||
projects.project_id, | ||
repos.owner as artifact_namespace, | ||
repos.name as artifact_name, | ||
repos.url as artifact_url, | ||
CAST(repos.id as STRING) as artifact_source_id | ||
from | ||
{{ ref('stg_ossd__current_projects') }} as projects | ||
cross join | ||
UNNEST(JSON_QUERY_ARRAY(projects.github)) as github | ||
inner join | ||
{{ ref('stg_ossd__current_repositories') }} as repos | ||
on | ||
LOWER(CONCAT("https://github.com/", repos.owner)) | ||
= LOWER(JSON_VALUE(github.url)) | ||
or LOWER(repos.url) = LOWER(JSON_VALUE(github.url)) | ||
), | ||
|
||
all_npm_raw as ( | ||
select | ||
"NPM" as artifact_source, | ||
"PACKAGE" as artifact_type, | ||
projects.project_id, | ||
JSON_VALUE(npm.url) as artifact_source_id, | ||
case | ||
when | ||
JSON_VALUE(npm.url) like "https://npmjs.com/package/%" | ||
then SUBSTR(JSON_VALUE(npm.url), 28) | ||
when | ||
JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" | ||
then SUBSTR(JSON_VALUE(npm.url), 31) | ||
end as artifact_name, | ||
JSON_VALUE(npm.url) as artifact_url | ||
from | ||
{{ ref('stg_ossd__current_projects') }} as projects | ||
cross join | ||
UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm | ||
), | ||
|
||
all_npm as ( | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_name, | ||
artifact_url, | ||
SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] | ||
as artifact_namespace | ||
from all_npm_raw | ||
), | ||
|
||
ossd_blockchain as ( | ||
select | ||
projects.project_id, | ||
tag as artifact_type, | ||
network as artifact_namespace, | ||
network as artifact_source, | ||
JSON_VALUE(blockchains.address) as artifact_source_id, | ||
JSON_VALUE(blockchains.address) as artifact_name, | ||
JSON_VALUE(blockchains.address) as artifact_url | ||
from | ||
{{ ref('stg_ossd__current_projects') }} as projects | ||
cross join | ||
UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains | ||
cross join | ||
UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network | ||
cross join | ||
UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag | ||
), | ||
|
||
all_deployers as ( | ||
select | ||
*, | ||
"MAINNET" as artifact_namespace, | ||
"ETHEREUM" as artifact_source | ||
from {{ ref("stg_ethereum__deployers") }} | ||
union all | ||
select | ||
*, | ||
"ARBITRUM_ONE" as artifact_namespace, | ||
"ARBITRUM_ONE" as artifact_source | ||
from {{ ref("stg_arbitrum__deployers") }} | ||
union all | ||
{# Includes all deployers of a contract #} | ||
select | ||
block_timestamp, | ||
transaction_hash, | ||
deployer_address, | ||
contract_address, | ||
UPPER(network) as artifact_namespace, | ||
UPPER(network) as artifact_source | ||
from {{ ref("int_derived_contracts") }} | ||
union all | ||
{# Includes all factory deployers of a contract #} | ||
select | ||
block_timestamp, | ||
transaction_hash, | ||
factory_deployer_address as deployer_address, | ||
contract_address, | ||
UPPER(network) as artifact_namespace, | ||
UPPER(network) as artifact_source | ||
from {{ ref("int_derived_contracts") }} | ||
), | ||
|
||
discovered_contracts as ( | ||
select | ||
"CONTRACT" as artifact_type, | ||
ob.project_id, | ||
ad.contract_address as artifact_source_id, | ||
ob.artifact_source, | ||
ob.artifact_namespace, | ||
ad.contract_address as artifact_name, | ||
ad.contract_address as artifact_url | ||
from ossd_blockchain as ob | ||
inner join all_deployers as ad | ||
on | ||
ob.artifact_source_id = ad.deployer_address | ||
{# | ||
We currently do not really have a notion of namespace in | ||
oss-directory. We may need to change this when that time comes | ||
#} | ||
and UPPER(ob.artifact_source) in (UPPER(ad.artifact_source), "ANY_EVM") | ||
and UPPER(ob.artifact_namespace) in ( | ||
UPPER(ad.artifact_namespace), "ANY_EVM" | ||
) | ||
and UPPER(ob.artifact_type) in ("EOA", "DEPLOYER", "FACTORY") | ||
), | ||
|
||
all_artifacts as ( | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
all_repos | ||
union all | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
ossd_blockchain | ||
union all | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
discovered_contracts | ||
union all | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
all_npm | ||
), | ||
|
||
all_normalized_artifacts as ( | ||
select distinct | ||
project_id, | ||
LOWER(artifact_source_id) as artifact_source_id, | ||
{# | ||
artifact_source and artifact_type are considered internal constants hence | ||
we apply an UPPER transform | ||
#} | ||
UPPER(artifact_source) as artifact_source, | ||
UPPER(artifact_type) as artifact_type, | ||
LOWER(artifact_namespace) as artifact_namespace, | ||
LOWER(artifact_name) as artifact_name, | ||
LOWER(artifact_url) as artifact_url | ||
from all_artifacts | ||
) | ||
|
||
select | ||
project_id, | ||
{{ oso_id("a.artifact_source", "a.artifact_source_id") }} as `artifact_id`, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url, | ||
artifact_type | ||
from all_normalized_artifacts as a |
8 changes: 8 additions & 0 deletions
8
warehouse/dbt/models/intermediate/directory/int_artifact_types.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
select distinct | ||
artifact_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_type | ||
from {{ ref('int_all_artifacts') }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.