diff --git a/pums/education/est1.yaml b/pums/education/est1.yaml deleted file mode 100644 index 02e7cdb..0000000 --- a/pums/education/est1.yaml +++ /dev/null @@ -1,85 +0,0 @@ -default_values: &defaults - - - column: WAGP - name: avg_wage - apply_inflation: True - - - column: age - name: avg_age - - - column: WKHP - name: avg_hrs - -global: - seperator: "," - name: pums_beta - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - # source: "/tmp/test_.csv" - usecols: ['WKHP', 'FOD1P', 'RAC1P', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80'] - source_vars: - year: [2013] - est: [1] - # post_agg_add_pklengths: True - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - pums: "http://ftp.census.gov/acs_yr/pums/csv_pus.zip" - rename: - AGEP: age - SEX: sex - RAC1P: race - FOD1P: major - SCHL: degree - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - type: - degree: text - deduplicate: True -tables: - yid: - pk: ["year", "naics", "degree"] - depths: - naics: [0, 1, 2] - rca: - index: ["year", "naics"] - column: "degree" - values: [num_ppl] - values: *defaults - - yod: - pk: ["year", "soc", "degree"] - depths: - soc: [0, 1, 2, 3] - rca: - index: ["year", "soc"] - column: "degree" - values: [num_ppl] - values: *defaults - - ymd: - pk: ["year", "major", "degree"] - rca: - index: ["year", "major"] - column: "degree" - values: [num_ppl] - values: *defaults - diff --git a/pums/geography/est1.yaml b/pums/geography/est1.yaml deleted file mode 100644 index c6f754a..0000000 --- a/pums/geography/est1.yaml +++ /dev/null @@ -1,119 +0,0 @@ -default_values: &defaults - - - column: WAGP - name: avg_wage - apply_inflation: True - - - column: age - name: avg_age - - - column: WKHP - name: avg_hrs - -global: - seperator: "," - name: pums_beta - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - # source: "/tmp/test_.csv" - usecols: ['POBP', 'WKHP', 'FOD1P', 'RAC1P', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80'] - source_vars: - year: [2013] - est: [1] - geo_level: ["nation", "state", "puma"] - # post_agg_add_pklengths: True - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - pums: "http://ftp.census.gov/acs_yr/pums/csv_pus.zip" - rename: - AGEP: age - SEX: sex - RAC1P: race - FOD1P: major - SCHL: degree - POBP: birthplace - - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - type: - degree: text - deduplicate: True -tables: - ygd: - pk: ["year", "geo", "degree"] - rca: - index: ["year", "geo"] - column: "degree" - values: [num_ppl] - sumlevel: - values: *defaults - - ygs: - pk: ["year", "geo", "sex"] - sumlevel: - values: *defaults - - ygr: - pk: ["year", "geo", "race"] - sumlevel: - values: *defaults - - ygi: - pk: ["year", "geo", "naics"] - depths: - naics: [0, 1, 2] - rca: - index: ["year", "geo"] - column: "naics" - values: [num_ppl] - sumlevel: - values: *defaults - - ygio: - pk: ["year", "geo", "naics", "soc"] - depths: - soc: [0, 1, 2, 3] - naics: [0, 1, 2] - rca: - index: ["year", "geo", "naics"] - column: "soc" - values: [num_ppl] - sumlevel: - values: *defaults - - ygmd: - pk: ["year", "geo", "major", "degree"] - rca: - index: ["year", "geo", "major"] - column: "degree" - values: [num_ppl] - sumlevel: - values: *defaults - - ygb: - pk: ["year", "geo", "birthplace"] - rca: - index: ["year", "geo"] - column: "birthplace" - values: [num_ppl] - sumlevel: - values: *defaults diff --git a/pums/geography/est5.yaml b/pums/geography/est5.yaml deleted file mode 100644 index 2be6f2d..0000000 --- a/pums/geography/est5.yaml +++ /dev/null @@ -1,11 +0,0 @@ -inherits: est1.yaml - -global: - name: pums_5year - source_vars: - est: [5] - usecols: ['POBP05', 'POBP12', 'WKHP', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SEX', 'AGEP', 'PUMA00', 'PUMA10', 'PWGTP1', 'PWGTP2', 'PWGTP3', 'PWGTP4', 'PWGTP5', 'PWGTP6', 'PWGTP7', 'PWGTP8', 'PWGTP9', 'PWGTP10', 'PWGTP11', 'PWGTP12', 'PWGTP13', 'PWGTP14', 'PWGTP15', 'PWGTP16', 'PWGTP17', 'PWGTP18', 'PWGTP19', 'PWGTP20', 'PWGTP21', 'PWGTP22', 'PWGTP23', 'PWGTP24', 'PWGTP25', 'PWGTP26', 'PWGTP27', 'PWGTP28', 'PWGTP29', 'PWGTP30', 'PWGTP31', 'PWGTP32', 'PWGTP33', 'PWGTP34', 'PWGTP35', 'PWGTP36', 'PWGTP37', 'PWGTP38', 'PWGTP39', 'PWGTP40', 'PWGTP41', 'PWGTP42', 'PWGTP43', 'PWGTP44', 'PWGTP45', 'PWGTP46', 'PWGTP47', 'PWGTP48', 'PWGTP49', 'PWGTP50', 'PWGTP51', 'PWGTP52', 'PWGTP53', 'PWGTP54', 'PWGTP55', 'PWGTP56', 'PWGTP57', 'PWGTP58', 'PWGTP59', 'PWGTP60', 'PWGTP61', 'PWGTP62', 'PWGTP63', 'PWGTP64', 'PWGTP65', 'PWGTP66', 'PWGTP67', 'PWGTP68', 'PWGTP69', 'PWGTP70', 'PWGTP71', 'PWGTP72', 'PWGTP73', 'PWGTP74', 'PWGTP75', 'PWGTP76', 'PWGTP77', 'PWGTP78', 'PWGTP79', 'PWGTP80'] - - source : "$HOME/pums_entire_usa///csv_pus.zip" - output : "$HOME/jspeiser/pums_output///" - diff --git a/pums/geography/pr_est1.yaml b/pums/geography/pr_est1.yaml deleted file mode 100644 index e7f8b74..0000000 --- a/pums/geography/pr_est1.yaml +++ /dev/null @@ -1,139 +0,0 @@ -default_values: &defaults - - - column: WAGP - name: avg_wage - apply_inflation: True - - - column: age - name: avg_age - - - column: WKHP - name: avg_hrs - -global: - seperator: "," - name: pums_beta - use_schema: True - source : "/tmp/pums_pr///csv_ppr.zip" - # source: "/tmp/test_.csv" - usecols: ['POBP', 'WKHP', 'FOD1P', 'RAC1P', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80'] - source_vars: - year: [2013] - est: [1] - geo_level: ["state", "puma"] - # post_agg_add_pklengths: True - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output/ppr///" - web_paths: - pums: "http://ftp.census.gov/acs_yr/pums/csv_ppr.zip" - rename: - AGEP: age - SEX: sex - RAC1P: race - FOD1P: major - SCHL: degree - POBP: birthplace - - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - type: - degree: text - deduplicate: True -tables: - yg: - pk: ["year", "geo"] - sumlevel: - values: *defaults - gini: True - - ygd: - pk: ["year", "geo", "degree"] - rca: - index: ["year", "geo"] - column: "degree" - values: [num_ppl] - sumlevel: - values: *defaults - - ygs: - pk: ["year", "geo", "sex"] - sumlevel: - values: *defaults - - ygr: - pk: ["year", "geo", "race"] - sumlevel: - values: *defaults - - ygi: - pk: ["year", "geo", "naics"] - depths: - naics: [0, 1, 2] - rca: - index: ["year", "geo"] - column: "naics" - values: [num_ppl] - sumlevel: - values: *defaults - - ygio: - pk: ["year", "geo", "naics", "soc"] - depths: - soc: [0, 1, 2, 3] - naics: [0, 1, 2] - rca: - index: ["year", "geo", "naics"] - column: "soc" - values: [num_ppl] - sumlevel: - values: *defaults - - ygmd: - pk: ["year", "geo", "major", "degree"] - rca: - index: ["year", "geo", "major"] - column: "degree" - values: [num_ppl] - sumlevel: - values: *defaults - - ygb: - pk: ["year", "geo", "birthplace"] - rca: - index: ["year", "geo"] - column: "birthplace" - values: [num_ppl] - sumlevel: - values: *defaults - - ygor: - pk: ["year", "geo", "soc", "race"] - depths: - soc: [0, 1, 2, 3] - sumlevel: - values: *defaults - - ygos: - pk: ["year", "geo", "soc", "sex"] - depths: - soc: [0, 1, 2, 3] - sumlevel: - values: *defaults diff --git a/pums/household/household.yaml b/pums/household/household.yaml deleted file mode 100644 index bd2f4ec..0000000 --- a/pums/household/household.yaml +++ /dev/null @@ -1,101 +0,0 @@ -global: - seperator: "," - name: pums - use_schema: True - source : "/tmp/pums///csv_hus.zip" - source_vars: - year: - start: 2005 - end: 2013 - est: [1] - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id"] - output : "/tmp/pums_house_output///" - web_paths: - "2005": "http://www2.census.gov/acs/downloads/pums//csv_hus.zip" - "2006": "http://www2.census.gov/acs/downloads/pums//csv_hus.zip" - pums: "ftp://ftp.census.gov/acs_yr/pums/csv_hus.zip" - type: - ST: "str" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - - transform: - df: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "puma" - mode: household - - rename: - TAXP: property_tax - TEN: tenure - VAL: property_value - VALP: property_value - VEH: num_vehicles - - named_transforms: - pums_state: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "state" - mode: household - pums_nation: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "nation" - mode: household - -tables: - puma_yeg_tenure: - pk: ["year", "est", "geo_id", "tenure"] - state_yeg_tenure: - pk: ["year", "est", "geo_id", "tenure"] - transform: - df: pums_state - nation_yeg_tenure: - pk: ["year", "geo_id", "tenure"] - transform: - df: pums_nation - - puma_yeg_ptax: - pk: ["year", "est", "geo_id", "property_tax"] - state_yeg_ptax: - pk: ["year", "est", "geo_id", "property_tax"] - transform: - df: pums_state - nation_yeg_ptax: - pk: ["year", "est", "geo_id", "property_tax"] - transform: - df: pums_nation - - puma_yeg_pval: - pk: ["year", "est", "geo_id", "property_value"] - state_yeg_pval: - pk: ["year", "est", "geo_id", "property_value"] - transform: - df: pums_state - nation_yeg_pval: - pk: ["year", "est", "geo_id", "property_value"] - transform: - df: pums_nation - - puma_yeg_vehicles: - pk: ["year", "est", "geo_id", "num_vehicles"] - state_yeg_vehicles: - pk: ["year", "est", "geo_id", "num_vehicles"] - transform: - df: pums_state - nation_yeg_vehicles: - pk: ["year", "est", "geo_id", "num_vehicles"] - transform: - df: pums_nation diff --git a/pums/household/internet.yaml b/pums/household/internet.yaml deleted file mode 100644 index 29c1fe7..0000000 --- a/pums/household/internet.yaml +++ /dev/null @@ -1,62 +0,0 @@ -global: - seperator: "," - name: pums - use_schema: True - source : "/tmp/pums///csv_hus.zip" - source_vars: - year: - start: 2013 - end: 2013 - est: [1] - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id"] - output : "/tmp/pums_house_output///" - web_paths: - pums: "ftp://ftp.census.gov/acs_yr/pums/csv_hus.zip" - type: - ST: "str" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - - transform: - df: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "puma" - mode: household - - rename: - ACCESS: internet_access - - named_transforms: - pums_state: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "state" - mode: household - pums_nation: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "nation" - mode: household - -tables: - puma_yg_internet: - pk: ["year", "geo_id", "internet_access"] - state_yg_internet: - pk: ["year", "geo_id", "internet_access"] - transform: - df: pums_state - nation_yg_internet: - pk: ["year", "geo_id", "internet_access"] - transform: - df: pums_nation diff --git a/pums/industry/est1.yaml b/pums/industry/est1.yaml deleted file mode 100644 index 5f88a8f..0000000 --- a/pums/industry/est1.yaml +++ /dev/null @@ -1,156 +0,0 @@ -default_values: &defaults - - - column: WAGP - name: avg_wage - apply_inflation: True - - - column: age - name: avg_age - - - column: WKHP - name: avg_hrs - -global: - seperator: "," - name: pums_beta - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - # source: "/tmp/test_.csv" - usecols: ['WKHP', 'FOD1P', 'RAC1P', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80'] - source_vars: - year: [2013] - est: [1] - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - pums: "http://ftp.census.gov/acs_yr/pums/csv_pus.zip" - rename: - AGEP: age - SEX: sex - RAC1P: race - FOD1P: major - SCHL: degree - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - type: - degree: text - deduplicate: True -tables: - yi: - pk: ["year", "naics"] - depths: - naics: [0, 1, 2] - values: *defaults - gini: True - post_agg_transform: - - column: avg_wage_rank - type: rank - target: avg_wage - where: - column: num_records - func: gt - value: 5 - - column: num_ppl_rank - type: rank - target: num_ppl - where: - column: num_records - func: gt - value: 5 - - column: avg_age_rank - type: rank - target: avg_age - where: - column: num_records - func: gt - value: 5 - yid: - pk: ["year", "naics", "degree"] - depths: - naics: [0, 1, 2] - rca: - index: ["year", "naics"] - column: "degree" - values: [num_ppl] - values: *defaults - - yim: - pk: ["year", "naics", "major"] - depths: - naics: [0, 1, 2] - rca: - index: ["year", "naics"] - column: "major" - values: [num_ppl] - values: *defaults - - yio: - pk: ["year", "naics", "soc"] - depths: - soc: [0, 1, 2, 3] - naics: [0, 1, 2] - rca: - index: ["year", "naics"] - column: "soc" - values: [num_ppl] - values: *defaults - - yior: - pk: ["year", "naics", "soc", "race"] - depths: - soc: [0, 1, 2, 3] - naics: [0, 1, 2] - values: *defaults - - yios: - pk: ["year", "naics", "soc", "sex"] - depths: - soc: [0, 1, 2, 3] - naics: [0, 1, 2] - values: *defaults - - yiw: - pk: ["year", "naics", "wage_bin"] - depths: - naics: [0, 1, 2] - wage_bin: True - values: *defaults - - yis: - pk: ["year", "naics", "sex"] - depths: - naics: [0, 1, 2] - values: *defaults - rca: - index: ["year", "naics"] - column: "sex" - values: [avg_wage] - - yir: - pk: ["year", "naics", "race"] - depths: - naics: [0, 1, 2] - values: *defaults - rca: - index: ["year", "naics"] - column: "race" - values: [avg_wage] - diff --git a/pums/majors/est1.yaml b/pums/majors/est1.yaml deleted file mode 100644 index 6b01a7d..0000000 --- a/pums/majors/est1.yaml +++ /dev/null @@ -1,190 +0,0 @@ -default_values: &defaults - - - column: WAGP - name: avg_wage - apply_inflation: True - - - column: age - name: avg_age - - - column: WKHP - name: avg_hrs - -global: - seperator: "," - name: pums_beta - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - # source: "/tmp/test_.csv" - usecols: ['NATIVITY', 'POBP', 'WKHP', 'FOD1P', 'RAC1P', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80'] - source_vars: - year: [2013] - est: [1] - #geo_level: ["nation", "state", "puma"] - # post_agg_add_pklengths: True - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records", "country", "native"] - output : "/tmp/pums_output///" - web_paths: - pums: "http://ftp.census.gov/acs_yr/pums/csv_pus.zip" - rename: - AGEP: age - SEX: sex - RAC1P: race - FOD1P: cip - SCHL: degree - NATIVITY: native - POBP: birthplace - - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - FOD1P: str - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - type: - degree: text - deduplicate: True - -tables: - yca: - pk: ["year", "cip", "age"] - rca: - index: ["year", "cip"] - column: "age" - values: [num_ppl] - values: *defaults - - ycb: - pk: ["year", "cip", "birthplace"] - rca: - index: ["year", "cip"] - column: "birthplace" - values: [num_ppl] - values: *defaults - - ycd: - pk: ["year", "cip", "degree"] - rca: - index: ["year", "cip"] - column: "degree" - values: [num_ppl] - values: *defaults - - ycgb: - pk: ["year", "cip", "geo_id", "birthplace"] - sumlevel: state - rca: - index: ["year", "cip", "geo_id"] - column: "birthplace" - values: [num_ppl] - values: *defaults - - ycn: - pk: ["year", "cip", "native"] - rca: - index: ["year", "cip"] - column: "native" - values: [num_ppl] - values: *defaults - - yoc: - pk: ["year", "soc", "cip"] - depths: - soc: [0, 1, 2, 3] - rca: - index: ["year", "soc"] - column: "cip" - values: [num_ppl] - values: *defaults - - ycr: - pk: ["year", "cip", "race"] - rca: - index: ["year", "cip"] - column: "race" - values: [num_ppl] - values: *defaults - - ycs: - pk: ["year", "cip", "sex"] - rca: - index: ["year", "cip"] - column: "sex" - values: [num_ppl] - values: *defaults - - ycsr: - pk: ["year", "cip", "sex", "race"] - rca: - index: ["year", "cip", "sex"] - column: "race" - values: [num_ppl] - values: *defaults - - yc: - pk: ["year", "cip"] - values: *defaults - post_agg_transform: - - - column: avg_wage_rank - type: rank - target: avg_wage - where: - column: num_records - func: gt - value: 5 - ygc: - table_vars: - geo_level: ["nation", "state", "puma"] - pk: ["year", "geo_id", "cip"] - rca: - index: ["year", "geo_id"] - column: "cip" - values: [num_ppl] - sumlevel: - - ygcd: - pk: ["year", "geo_id", "cip", "degree"] - table_vars: - geo_level: ["nation", "state", "puma"] - rca: - index: ["year", "geo_id", "cip"] - column: "degree" - values: [num_ppl] - sumlevel: - values: *defaults - - ygc: - pk: ["year", "geo_id", "cip"] - table_vars: - geo_level: ["nation", "state", "puma"] - rca: - index: ["year", "geo_id"] - column: "cip" - values: [num_ppl] - sumlevel: - values: *defaults - yocd: - pk: ["year", "soc", "cip", "degree"] - depths: - soc: [0, 1, 2, 3] - rca: - index: ["year", "soc", "cip"] - column: "degree" - values: [num_ppl] - values: *defaults diff --git a/pums/occupation/est1.yaml b/pums/occupation/est1.yaml deleted file mode 100644 index f042cd9..0000000 --- a/pums/occupation/est1.yaml +++ /dev/null @@ -1,110 +0,0 @@ -default_values: &defaults - - - column: WAGP - name: avg_wage - apply_inflation: True - - - column: age - name: avg_age - - - column: WKHP - name: avg_hrs - -global: - seperator: "," - name: pums_beta - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - usecols: ['WKHP', 'FOD1P', 'RAC1P', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80'] - source_vars: - year: [2013] - est: [1] - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - pums: "http://ftp.census.gov/acs_yr/pums/csv_pus.zip" - rename: - AGEP: age - SEX: sex - RAC1P: race - FOD1P: cip - SCHL: degree - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - RAC1P: "str" - SEX: "str" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - type: - degree: text -tables: - yow: - pk: ["year", "soc", "wage_bin"] - depths: - soc: [0, 1, 2, 3] - wage_bin: True - values: *defaults - - yor: - pk: ["year", "soc", "race"] - depths: - soc: [0, 1, 2, 3] - values: *defaults - - yos: - pk: ["year", "soc", "sex"] - depths: - soc: [0, 1, 2, 3] - values: *defaults - - yoas: - pk: ["year", "soc", "age", "sex"] - depths: - soc: [0, 1, 2, 3] - values: *defaults - - yo: - pk: ["year", "soc"] - gini: True - depths: - soc: [0, 1, 2, 3] - values: *defaults - post_agg_transform: - - column: avg_wage_rank - type: rank - target: avg_wage - where: - column: num_records - func: gt - value: 5 - - column: num_ppl_rank - type: rank - target: num_ppl - where: - column: num_records - func: gt - value: 5 - - column: avg_age_rank - type: rank - target: avg_age - where: - column: num_records - func: gt - value: 5 - diff --git a/pums/person/est1.yaml b/pums/person/est1.yaml deleted file mode 100644 index 8cc58ff..0000000 --- a/pums/person/est1.yaml +++ /dev/null @@ -1,109 +0,0 @@ -default_values: &defaults - - - column: WAGP - name: avg_wage - apply_inflation: True - - - column: age - name: avg_age - - - column: WKHP - name: avg_hrs - -ygo: &ygo - pk: ["year", "geo_id", "soc"] - depths: - soc: [0, 1, 2, 3] - rca: - index: ["year", "geo_id"] - column: "soc" - values: [num_ppl] - sumlevel: - values: *defaults - -ygor: &ygor - pk: ["year", "geo_id", "soc", "race"] - depths: - soc: [0, 1, 2, 3] - sumlevel: - values: *defaults - -ygos: &ygos - pk: ["year", "geo_id", "soc", "sex"] - depths: - soc: [0, 1, 2, 3] - sumlevel: - values: *defaults - -ygi: &ygi - pk: ["year", "geo_id", "naics"] - depths: - naics: [0, 1, 2] - rca: - index: ["year", "geo_id"] - column: "naics" - values: [num_ppl] - sumlevel: - values: *defaults - -global: - seperator: "," - name: pums_beta - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - # source: "/tmp/test_.csv" - usecols: ['WKHP', 'FOD1P', 'RAC1P', 'ADJINC', 'ESR', 'PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80'] - source_vars: - year: [2013] - est: [1] - geo_level: ["nation", "state", "puma"] - # post_agg_add_pklengths: True - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - pums: "http://ftp.census.gov/acs_yr/pums/csv_pus.zip" - rename: - AGEP: age - SEX: sex - RAC1P: race - FOD1P: major - SCHL: degree - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - - deduplicate: True -tables: - yg__yr: - pk: ["year", "geo_id"] - sumlevel: - values: *defaults - gini: True - # ygos__yr: - # <<: *ygos - - # ygor__yr: - # <<: *ygor - - # ygi__yr: - # <<: *ygi - - # ygo__yr: - # <<: *ygo \ No newline at end of file diff --git a/pums/person/est3.yaml b/pums/person/est3.yaml deleted file mode 100644 index 3a98fca..0000000 --- a/pums/person/est3.yaml +++ /dev/null @@ -1,80 +0,0 @@ -global: - seperator: "," - name: pums - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - df_chunker: - func: src.plugins.census.pums.chunker.process - pk: [""] - # source: "/tmp/test_.csv" - source_vars: - year: - start: 2007 - end: 2007 - est: - start: 3 - end: 3 - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - "2005": "http://www2.census.gov/acs/downloads/pums//csv_pus.zip" - "2006": "http://www2.census.gov/acs/downloads/pums//csv_pus.zip" - pums: "http://ftp.census.gov/acs_yr/pums/csv_pus.zip" - # type must be the original name of the column (before rename) - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - - transform: - df: - type: frame_func - func: "plugins.census.pums.chunker.step2" - sumlevel: "puma" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - depths: - year: ["identity"] - est: ["identity"] - naics: [2, True] - soc: [2, True] - geo_id: ["identity"] - - named_transforms: - pums_state: - type: frame_func - func: "plugins.census.pums.chunker.step2" - sumlevel: "state" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - -tables: - ztest3_puma_yego: - pk: ["year", "est", "geo_id"] - diff --git a/pums/person/est5.yaml b/pums/person/est5.yaml deleted file mode 100644 index d2a99dc..0000000 --- a/pums/person/est5.yaml +++ /dev/null @@ -1,89 +0,0 @@ -global: - seperator: "," - name: pums - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - # source: "/tmp/test_.csv" - source_vars: - year: - start: 2005 - end: 2013 - est: - start: 5 - end: 5 - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - "2005": "http://www2.census.gov/acs/downloads/pums//csv_pus.zip" - "2006": "http://www2.census.gov/acs/downloads/pums//csv_pus.zip" - pums: "ftp://ftp.census.gov/acs_yr/pums/csv_pus.zip" - # type must be the original name of the column (before rename) - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - - transform: - df: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "puma" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - depths: - year: ["identity"] - est: ["identity"] - naics: [2, True] - soc: [2, True] - geo_id: ["identity"] - - named_transforms: - pums_state: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "state" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - - pums_nation: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "nation" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - -tables: - puma_yego: - pk: ["year", "est", "geo_id", "soc"] - diff --git a/pums/person/pums.yaml b/pums/person/pums.yaml deleted file mode 100644 index c79b753..0000000 --- a/pums/person/pums.yaml +++ /dev/null @@ -1,118 +0,0 @@ -global: - seperator: "," - name: pums - use_schema: True - source : "/tmp/pums_entire_usa///csv_pus.zip" - # source: "/tmp/test_.csv" - usecols: ['PWGTP', 'SCHL', 'ST', 'WAGP', 'SOCP', 'NAICSP', 'SEX', 'AGEP', 'PUMA', 'PWGTP1', 'PWGTP2', 'PWGTP3', 'PWGTP4', 'PWGTP5', 'PWGTP6', 'PWGTP7', 'PWGTP8', 'PWGTP9', 'PWGTP10', 'PWGTP11', 'PWGTP12', 'PWGTP13', 'PWGTP14', 'PWGTP15', 'PWGTP16', 'PWGTP17', 'PWGTP18', 'PWGTP19', 'PWGTP20', 'PWGTP21', 'PWGTP22', 'PWGTP23', 'PWGTP24', 'PWGTP25', 'PWGTP26', 'PWGTP27', 'PWGTP28', 'PWGTP29', 'PWGTP30', 'PWGTP31', 'PWGTP32', 'PWGTP33', 'PWGTP34', 'PWGTP35', 'PWGTP36', 'PWGTP37', 'PWGTP38', 'PWGTP39', 'PWGTP40', 'PWGTP41', 'PWGTP42', 'PWGTP43', 'PWGTP44', 'PWGTP45', 'PWGTP46', 'PWGTP47', 'PWGTP48', 'PWGTP49', 'PWGTP50', 'PWGTP51', 'PWGTP52', 'PWGTP53', 'PWGTP54', 'PWGTP55', 'PWGTP56', 'PWGTP57', 'PWGTP58', 'PWGTP59', 'PWGTP60', 'PWGTP61', 'PWGTP62', 'PWGTP63', 'PWGTP64', 'PWGTP65', 'PWGTP66', 'PWGTP67', 'PWGTP68', 'PWGTP69', 'PWGTP70', 'PWGTP71', 'PWGTP72', 'PWGTP73', 'PWGTP74', 'PWGTP75', 'PWGTP76', 'PWGTP77', 'PWGTP78', 'PWGTP79', 'PWGTP80'] - source_vars: - year: - start: 2005 - end: 2013 - est: - start: 1 - end: 1 - - archive_files: "csv" - strip_column_whitespace: True - force_column_uppercase: - exclude: ["year", "est", "geo_id", "naics", "num_households", "num_ppl", "num_records"] - output : "/tmp/pums_output///" - web_paths: - "2005": "http://www2.census.gov/acs/downloads/pums//csv_pus.zip" - "2006": "http://www2.census.gov/acs/downloads/pums//csv_pus.zip" - pums: "ftp://ftp.census.gov/acs_yr/pums/csv_pus.zip" - # type must be the original name of the column (before rename) - type: - occp02: "str" - naicsp02: "str" - naicsp07: "str" - occp10 : "str" - ST: "str" - AGEP: "int" - - agg: "sum" - - import_to_db: True - db_settings: - user: postgres - password_env_var: DATAUSA_PW - host: 162.209.124.219 - db_name: datausa - - transform: - df: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "puma" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - - named_transforms: - pums_state: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "state" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - - pums_nation: - type: frame_func - func: "plugins.census.pums.process" - sumlevel: "nation" - values: - - - column: WAGP - name: avg_wage - - - column: AGEP - name: avg_age - -tables: - nation_yegi: - pk: ["year", "est", "geo_id", "naics"] - transform: - df: pums_nation - state_yegi: - pk: ["year", "est", "geo_id", "naics"] - transform: - df: pums_state - puma_yegi: - pk: ["year", "est", "geo_id", "naics"] - - - - nation_yego: - pk: ["year", "est", "geo_id", "soc"] - transform: - df: pums_nation - state_yego: - pk: ["year", "est", "geo_id", "soc"] - transform: - df: pums_state - puma_yego: - pk: ["year", "est", "geo_id", "soc"] - - - - nation_yegio: - pk: ["year", "est", "geo_id", "naics", "soc"] - transform: - df: pums_nation - state_yegio: - pk: ["year", "est", "geo_id", "naics", "soc"] - transform: - df: pums_state - puma_yegio: - pk: ["year", "est", "geo_id", "naics", "soc"] -