Skip to content

Commit

Permalink
Merge branch 'main' into flag-vars-without-var-lbl
Browse files Browse the repository at this point in the history
  • Loading branch information
kbjarkefur authored Jan 16, 2024
2 parents 3fc2011 + ce11672 commit e11e6c5
Show file tree
Hide file tree
Showing 16 changed files with 888 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
# Stata
!/**/*.do
!/**/*.ado
!/**/sthlp/*.sthlp
!src/stata.toc
!src/*.pkg

Expand Down
6 changes: 3 additions & 3 deletions run-adodown-util.do
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
global clone "C:/Users/wb462869/github/labeller"
}
else if "`c(username)'" == "wb393438" {
global clone "C:\Users\wb393438\stata_funs\labeller\labeller"
}

global clone "C:\Users\wb393438\stata_funs\labeller"
}
/*
ad_setup, adf("${clone}") ///
name("labeller") ///
Expand Down
28 changes: 28 additions & 0 deletions src/ado/lbl_assert_no_long_varlbl.ado
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cap program drop lbl_assert_no_long_varlbl
program define lbl_assert_no_long_varlbl, rclass

version 14

syntax [varlist], [MAXlen(integer 80)]

qui {

* look for variables whose labels >= max length
lbl_list_long_varlbl `varlist', maxlen(`maxlen')
local any_max_len = (`r(count_matches)' > 0)
local which_max_len "`r(varlist)'"

* return results
return local varlist "`which_max_len'"
return local count_matches "`any_max_len'"

* if any variables with long labels found, message and error
if (`any_max_len' == 1) {
di as error "{pstd}Variables found whose labels are >= `maxlen' characters:{p_end}",
di as error "{phang}`which_max_len'{p_end}"
error 9
}

}

end
48 changes: 48 additions & 0 deletions src/ado/lbl_list_long_varlbl.ado
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
*! version XX XXXXXXXXX ADAUTHORNAME ADCONTACTINFO

cap program drop lbl_list_long_varlbl
program define lbl_list_long_varlbl, rclass

version 14

syntax [varlist], [MAXlen(integer 80)]

qui {

* get list of all variables
ds `varlist', has(varlabel)
local vars = r(varlist)

* initialize list of variables with labels that are too long
local vars_lbl_too_long ""

* populate list of variables
foreach var of local vars {

* extract variable label
local var_lbl : variable label `var'

* if length is greater than or equal to max, put in list
if (`: ustrlen local var_lbl' >= `maxlen') {
local vars_lbl_too_long "`vars_lbl_too_long' `var'"
}
}

* compute the number of matches
local n_matches : list sizeof vars_lbl_too_long

* return the varlist and count of matches
return local varlist "`vars_lbl_too_long'"
return local count_matches "`n_matches'"

* message about outcome
if (`n_matches' >= 1) {
noi di as result "{pstd}Variables with at least `maxlen' characters found (`n_matches' variables) :{p_end}"
noi di as result "{phang}`vars_lbl_too_long'{p_end}"
}
else if (`n_matches' == 0) {
noi di as result "{pstd}No variables found with a label >= `maxlen' characters found{p_end}"
}
}

end
53 changes: 53 additions & 0 deletions src/ado/lbl_list_matching_vars.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
capture program drop lbl_list_matching_vars
program define lbl_list_matching_vars, rclass

qui {

syntax anything (name=pattern), [varlist(varlist)] [NEGate]

version 14

* get list of all (matching) variables
ds `varlist', has(varlabel)
local vars = r(varlist)

local vars_w_match_lbl ""

foreach var of local vars {

* extract the variable label
local var_label : variable label `var'

* determine whether the label matches the user-provided regex pattern
local lbl_matches = ustrregexm("`var_label'", `pattern')

* include the variable name in list of variables with matching labels
if (mi("`negate'") & (`lbl_matches' == 1)) {
local vars_w_match_lbl "`vars_w_match_lbl' `var'"
}
else if (!mi("`negate'") & (`lbl_matches' == 0)) {
local vars_w_match_lbl "`vars_w_match_lbl' `var'"
}

}

* compute the number of matches
local n_matches : list sizeof vars_w_match_lbl

* return the varlist and count of matches
return local varlist = "`vars_w_match_lbl'"
return local count_regex_matches = "`n_matches'"

* message about outcome
if (`n_matches' >= 1) {
noi di as result "Matches found (`n_matches' variables) :"
noi di as result "`vars_w_match_lbl'"
}
else if (`n_matches' == 0) {
noi di as error "No matching variables found"
noi di as result "If this result is unexpected, please check the regular expression provided."
}

}

end
23 changes: 12 additions & 11 deletions src/dev/run-adodown-util.do
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
global clone "C:/Users/wb462869/github/labeller"
}
* Fill in your root path here
if "`c(username)'" == "bbdaniels" {
global clone "/Users/bbdaniels/GitHub/repkit"
if "`c(username)'" == "wb393438" {
global clone "C:\Users\wb393438\stata_funs\labeller"
}

ad_setup, adf("${clone}") ///
name("labeller") ///
description("A packge with utility commands related to lables. Particularly, but not exclusively, in relation to data sets collected using SurveySolutions.") ///
author("LSMS Worldbank") ///
contact("[email protected]") ///
url("https://github.com/lsms-worldbank/labeller") ///
github

ad_sthlp , adf("${clone}")
// ad_setup, adf("${clone}") ///
// name("labeller") ///
// description("A packge with utility commands related to lables. Particularly, but not exclusively, in relation to data sets collected using SurveySolutions.") ///
// author("LSMS Worldbank") ///
// contact("[email protected]") ///
// url("https://github.com/lsms-worldbank/labeller") ///
// github

//ad_command create reprun_dataline , adf("`repkit'") pkg(repkit)
ad_sthlp , adf("${clone}")

//ad_command create reprun_dataline , adf("`repkit'") pkg(repkit)
4 changes: 4 additions & 0 deletions src/labeller.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@ d
f ado/lbl_assert_var_have_lbl.ado
f ado/lbl_list_no_var_lbl.ado
f ado/labeller.ado
f ado/lbl_assert_no_long_varlbl.ado
f ado/lbl_list_long_varlbl.ado

*** helpfiles
f sthlp/lbl_assert_var_have_lbl.sthlp
f sthlp/lbl_list_no_var_lbl.sthlp
f sthlp/lbl_assert_no_long_varlbl.sthlp
f sthlp/lbl_list_long_varlbl.sthlp
f sthlp/labeller.sthlp

*** ancillaryfiles
Expand Down
59 changes: 59 additions & 0 deletions src/mdhlp/lbl_assert_no_long_varlbl.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Title

__lbl_assert_no_long_varlbl__ - Assert that there is no variable in memory whose variable length exceeds the desired character length.

# Syntax

__lbl_assert_no_long_varlbl__ , __**max**len__(_integer_)

| _options_ | Description |
|-----------|-------------|
| __**max**len__(_integer_) | Maximum character length allowed.

# Description

This command assert that there is no variable in memory whose variable label length exceeds the desired character length.

By default, the command take the maximum length to be Stata's maximum length for labels: 80 characters. If desired, the command can specify an alternative length through the __**max**len__(_integer_) option.

If there is at least one variable whose length exceeds the maximum length, the command will return an error and list the variables whose variable labels are too long.

# Options

__**max**len__(_integer_) sets the maximum length of variable labels.

# Examples

```
* create set of variables
gen var1 = .
gen var2 = .
gen var3 = .
gen var4 = .
gen var5 = .
* apply variables
label variable var1 "Short label"
label variable var2 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
label variable var3 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
label variable var4 "Another short label"
label variable var5 "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
* assert no variables with labels longer than default max length (80 characters)
lbl_assert_no_long_varlbl
* assert no variables with labels longer than user-specified max length (80 characters)
lbl_assert_no_long_varlbl, maxlen(12)
```

# Feedback, bug reports and contributions

Read more about the commands in this package at https://github.com/lsms-worldbank/labeller.

Please provide any feedback by opening an issue at https://github.com/lsms-worldbank/labeller/issues.

PRs with suggestions for improvements are also greatly appreciated.

# Authors

LSMS Team, The World Bank [email protected]
63 changes: 63 additions & 0 deletions src/mdhlp/lbl_list_long_varlbl.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Title

__lbl_list_long_varlbl__ - List variables whose variable label is longer than the desired character length.

# Syntax

__lbl_list_long_varlbl__ , __**max**len__(_integer_)

| _options_ | Description |
|-----------|-------------|
| __**max**len__(_integer_) | Maximum character length allowed.

# Description

When variable labels are too long, Stata truncates them to the first 80 characters of the string provided. This situation might arise for data exported from Survey Solutions. If provided, Survey Solutions uses the Variable label field in Designer, whose length is capped at 80 characters (in line with Stata's limits). If no label is specified in that field, Survey Solutions uses the Question text field, whose length maximum length is 2,000 characters. In the latter case, Survey Solutions uses the first 80 characters of the question text as its label.

To detect possible cases of truncation, data producers can check the length of each variable label individually (e.g., `local var_lbl : variable label my_var; local lbl_len : ustrlen local var_lbl`).

However, there is no base Stata operation for doing so in batch.

This command provides just such a tool.

By default, the command take the maximum length to be Stata's maximum length for labels: 80 characters. If desired, the command can specify an alternative length through the __**max**len__(_integer_) option.

# Options

__**max**len__(_integer_) sets the maximum length of variable labels, beyond which a variable is listed by this command.

# Examples

```
* create set of variables
gen var1 = .
gen var2 = .
gen var3 = .
gen var4 = .
gen var5 = .
* apply variables
label variable var1 "Short label"
label variable var2 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
label variable var3 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
label variable var4 "Another short label"
label variable var5 "你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好你好"
* list variables with longer than the default max length (80 characters)
lbl_list_long_varlbl
* list variables with longer than the user-specified max length
lbl_list_long_varlbl, maxlen(12)
```

# Feedback, bug reports and contributions

Read more about the commands in this package at https://github.com/lsms-worldbank/labeller.

Please provide any feedback by opening an issue at https://github.com/lsms-worldbank/labeller/issues.

PRs with suggestions for improvements are also greatly appreciated.

# Authors

LSMS Team, The World Bank [email protected]
Loading

0 comments on commit e11e6c5

Please sign in to comment.