-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #13 from lsms-worldbank/identify-var-labels-follow…
…-regex Identify var labels follow regex
- Loading branch information
Showing
8 changed files
with
410 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,7 @@ | |
# Stata | ||
!/**/*.do | ||
!/**/*.ado | ||
!/**/sthlp/*.sthlp | ||
!src/stata.toc | ||
!src/*.pkg | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
capture program drop lbl_list_matching_vars | ||
program define lbl_list_matching_vars, rclass | ||
|
||
qui { | ||
|
||
syntax anything (name=pattern), [varlist(varlist)] [NEGate] | ||
|
||
version 14 | ||
|
||
* get list of all (matching) variables | ||
ds `varlist', has(varlabel) | ||
local vars = r(varlist) | ||
|
||
local vars_w_match_lbl "" | ||
|
||
foreach var of local vars { | ||
|
||
* extract the variable label | ||
local var_label : variable label `var' | ||
|
||
* determine whether the label matches the user-provided regex pattern | ||
local lbl_matches = ustrregexm("`var_label'", `pattern') | ||
|
||
* include the variable name in list of variables with matching labels | ||
if (mi("`negate'") & (`lbl_matches' == 1)) { | ||
local vars_w_match_lbl "`vars_w_match_lbl' `var'" | ||
} | ||
else if (!mi("`negate'") & (`lbl_matches' == 0)) { | ||
local vars_w_match_lbl "`vars_w_match_lbl' `var'" | ||
} | ||
|
||
} | ||
|
||
* compute the number of matches | ||
local n_matches : list sizeof vars_w_match_lbl | ||
|
||
* return the varlist and count of matches | ||
return local varlist = "`vars_w_match_lbl'" | ||
return local count_regex_matches = "`n_matches'" | ||
|
||
* message about outcome | ||
if (`n_matches' >= 1) { | ||
noi di as result "Matches found (`n_matches' variables) :" | ||
noi di as result "`vars_w_match_lbl'" | ||
} | ||
else if (`n_matches' == 0) { | ||
noi di as error "No matching variables found" | ||
noi di as result "If this result is unexpected, please check the regular expression provided." | ||
} | ||
|
||
} | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,19 @@ | ||
* Kristoffer's root path | ||
if "`c(username)'" == "wb462869" { | ||
global clone "C:/Users/wb462869/github/labeller" | ||
} | ||
* Fill in your root path here | ||
if "`c(username)'" == "bbdaniels" { | ||
global clone "/Users/bbdaniels/GitHub/repkit" | ||
} | ||
* Kristoffer's root path | ||
if "`c(username)'" == "wb462869" { | ||
global clone "C:/Users/wb462869/github/labeller" | ||
} | ||
else if "`c(username)'" == "wb393438" { | ||
global clone "C:\Users\wb393438\stata_funs\labeller" | ||
} | ||
|
||
ad_setup, adf("${clone}") /// | ||
name("labeller") /// | ||
description("A packge with utility commands related to lables. Particularly, but not exclusively, in relation to data sets collected using SurveySolutions.") /// | ||
author("LSMS Worldbank") /// | ||
contact("[email protected]") /// | ||
url("https://github.com/lsms-worldbank/labeller") /// | ||
github | ||
// ad_setup, adf("${clone}") /// | ||
// name("labeller") /// | ||
// description("A packge with utility commands related to lables. Particularly, but not exclusively, in relation to data sets collected using SurveySolutions.") /// | ||
// author("LSMS Worldbank") /// | ||
// contact("[email protected]") /// | ||
// url("https://github.com/lsms-worldbank/labeller") /// | ||
// github | ||
|
||
ad_sthlp , adf("${clone}") | ||
ad_sthlp , adf("${clone}") | ||
|
||
//ad_command create reprun_dataline , adf("`repkit'") pkg(repkit) | ||
//ad_command create reprun_dataline , adf("`repkit'") pkg(repkit) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# Title | ||
|
||
__lbl_list_matching_vars__ - Identify variables whose label matches a pattern. | ||
|
||
# Syntax | ||
|
||
__lbl_list_matching_vars__ _regexstring_, [__varlist(varlist)__ __**neg**ate__] | ||
|
||
| _options_ | Description | | ||
|-----------|-------------| | ||
| __varlist(varlist)__ | Restricts the scope of search to a variable list | | ||
| __**neg**ate__ | Returns variables whose label does __not__ match | | ||
|
||
# Description | ||
|
||
Stata provides relatively few and mostly imperfect tools for searching variable labels for a matching string. The [label](https://www.stata.com/manuals13/dlabel.pdf) tools to not offer any methods for querying the variable labels. The [lookfor](https://www.stata.com/manuals/dlookfor.pdf) command, while the nearest match, falls short in a few ways. First, it searches over both variable names and variable labels. Second, it does not provide a means for restricting the scope of search to a variable list. And third, it fails to provide a means for inverting the search (i.e., returning everything that does not match). | ||
|
||
This function to fill the gap the following gaps: | ||
|
||
- Search over variable labels only | ||
- Specify search through (regex) patterns | ||
- Restrict search to an (optionally) user-provided variable list | ||
- Invert search to identify variables whose variables do not match | ||
|
||
# Options | ||
|
||
__**neg**ate__ inverts the match. Rather than return variables with matching variable labels, this option returns variables whose variable label do not match. | ||
|
||
__varlist__ restricts the scope of the search to the user-provided variable list. By default, `lbl_list_matching_vars` searches for matches in all variables in memory. With __varlist__, the scope of the search can be narrowed. | ||
|
||
# Examples | ||
|
||
## Example 1: Simple search | ||
|
||
``` | ||
* create a set of variables | ||
gen var1 = . | ||
gen var2 = . | ||
gen var3 = . | ||
gen var4 = . | ||
* apply variables | ||
label variable var1 "First label" | ||
label variable var2 "2. label" | ||
label variable var3 "3. label" | ||
label variable var4 "Fourth label" | ||
* find variables whose label contains "First" | ||
lbl_list_matching_vars "First" | ||
``` | ||
|
||
## Example 2: Regex search | ||
|
||
``` | ||
* find variables whose labels start with a number | ||
lbl_list_matching_vars "^[0-9]" | ||
``` | ||
|
||
## Example 3: Restrict search to a variable list | ||
|
||
``` | ||
* find variables whose label starts with "F" in var1 - var3 | ||
lbl_list_matching_vars "^F", varlist(var1 - var3) | ||
``` | ||
|
||
## Example 4: Return variables whose labels do not match | ||
|
||
``` | ||
* find variables whose labels do NOT start with a number | ||
lbl_list_matching_vars "^[0-9]", negate | ||
``` | ||
|
||
# Feedback, bug reports and contributions | ||
|
||
Read more about these commands on [this repo](https://github.com/lsms-worldbank/labeller) where this package is developed. Please provide any feedback by [opening an issue](https://github.com/lsms-worldbank/labeller/issues). PRs with suggestions for improvements are also greatly appreciated. | ||
|
||
# Authors | ||
|
||
LSMS Team, The World Bank [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
{smcl} | ||
{* 01 Jan 1960}{...} | ||
{hline} | ||
{pstd}help file for {hi:labeller}{p_end} | ||
{hline} | ||
|
||
{title:Title} | ||
|
||
{phang}{bf:labeller} - Package command with utilities for the rest of the package | ||
{p_end} | ||
|
||
{title:Syntax} | ||
|
||
{phang}{bf:labeller} | ||
{p_end} | ||
|
||
{title:Description} | ||
|
||
{pstd}This command only returns the version number and version data to the user. | ||
This command has little application for the user. | ||
For packages installed on SSC it is important that a there is a command | ||
in the package that has the same name as the package. | ||
That is the main purpose of this command. | ||
{p_end} | ||
|
||
{title:Options} | ||
|
||
{pstd}This command has no options. | ||
{p_end} | ||
|
||
{title:Feedback, bug reports and contributions} | ||
|
||
{pstd}Read more about the commands in this package at https://github.com/lsms-worldbank/labeller. | ||
{p_end} | ||
|
||
{pstd}Please provide any feed back by opening and issue at https://github.com/lsms-worldbank/labeller/issues. | ||
{p_end} | ||
|
||
{pstd}PRs with suggestions for improvements are also greatly appreciated. | ||
{p_end} | ||
|
||
{title:Authors} | ||
|
||
{pstd}LSMS Team, The World Bank [email protected] | ||
{p_end} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
{smcl} | ||
{* 01 Jan 1960}{...} | ||
{hline} | ||
{pstd}help file for {hi:lbl_list_matching_vars}{p_end} | ||
{hline} | ||
|
||
{title:Title} | ||
|
||
{phang}{bf:lbl_list_matching_vars} - Identify variables whose label matches a pattern. | ||
{p_end} | ||
|
||
{title:Syntax} | ||
|
||
{phang}{bf:lbl_list_matching_vars} {it:regexstring}, [{bf:varlist(varlist)} {bf:{ul:neg}ate}] | ||
{p_end} | ||
|
||
{synoptset 16}{...} | ||
{synopthdr:options} | ||
{synoptline} | ||
{synopt: {bf:varlist(varlist)}}Restricts the scope of search to a variable list{p_end} | ||
{synopt: {bf:{ul:neg}ate}}Returns variables whose label does {bf:not} match{p_end} | ||
{synoptline} | ||
|
||
{title:Description} | ||
|
||
{pstd}Stata provides relatively few and mostly imperfect tools for searching variable labels for a matching string. The {browse "https://www.stata.com/manuals13/dlabel.pdf":label} tools to not offer any methods for querying the variable labels. The {browse "https://www.stata.com/manuals/dlookfor.pdf":lookfor} command, while the nearest match, falls short in a few ways. First, it searches over both variable names and variable labels. Second, it does not provide a means for restricting the scope of search to a variable list. And third, it fails to provide a means for inverting the search (i.e., returning everything that does not match). | ||
{p_end} | ||
|
||
{pstd}This function to fill the gap the following gaps: | ||
{p_end} | ||
|
||
{pstd}- Search over variable labels only | ||
- Specify search through (regex) patterns | ||
- Restrict search to an (optionally) user-provided variable list | ||
- Invert search to identify variables whose variables do not match | ||
{p_end} | ||
|
||
{title:Options} | ||
|
||
{pstd}{bf:{ul:neg}ate} inverts the match. Rather than return variables with matching variable labels, this option returns variables whose variable label do not match. | ||
{p_end} | ||
|
||
{pstd}{bf:varlist} restricts the scope of the search to the user-provided variable list. By default, {inp:lbl_list_matching_vars} searches for matches in all variables in memory. With {bf:varlist}, the scope of the search can be narrowed. | ||
{p_end} | ||
|
||
{title:Examples} | ||
|
||
{dlgtab:Example 1: Simple search} | ||
|
||
{input}{space 8}* create a set of variables | ||
{space 8}gen var1 = . | ||
{space 8}gen var2 = . | ||
{space 8}gen var3 = . | ||
{space 8}gen var4 = . | ||
{space 8} | ||
{space 8}* apply variables | ||
{space 8}label variable var1 "First label" | ||
{space 8}label variable var2 "2. label" | ||
{space 8}label variable var3 "3. label" | ||
{space 8}label variable var4 "Fourth label" | ||
{space 8} | ||
{space 8}* find variables whose label contains "First" | ||
{space 8}lbl_list_matching_vars "First" | ||
{text} | ||
{dlgtab:Example 2: Regex search} | ||
|
||
{input}{space 8}* find variables whose labels start with a number | ||
{space 8}lbl_list_matching_vars "^[0-9]" | ||
{text} | ||
{dlgtab:Example 3: Restrict search to a variable list} | ||
|
||
{input}{space 8}* find variables whose label starts with "F" in var1 - var3 | ||
{space 8}lbl_list_matching_vars "^F", varlist(var1 - var3) | ||
{text} | ||
{dlgtab:Example 4: Return variables whose labels do not match} | ||
|
||
{input}{space 8}* find variables whose labels do NOT start with a number | ||
{space 8}lbl_list_matching_vars "^[0-9]", negate | ||
{text} | ||
{title:Feedback, bug reports and contributions} | ||
|
||
{pstd}Read more about these commands on {browse "https://github.com/lsms-worldbank/labeller":this repo} where this package is developed. Please provide any feedback by {browse "https://github.com/lsms-worldbank/labeller/issues":opening an issue}. PRs with suggestions for improvements are also greatly appreciated. | ||
{p_end} | ||
|
||
{title:Authors} | ||
|
||
{pstd}LSMS Team, The World Bank [email protected] | ||
{p_end} |
Oops, something went wrong.