grep(pattern, x, ignore.case = FALSE, perl = FALSE,
value = FALSE, fixed = FALSE, useBytes = FALSE, invert = FALSE)
grepl(pattern, x, ignore.case = FALSE, perl = FALSE,
fixed = FALSE, useBytes = FALSE)
regexec(pattern, text, ignore.case = FALSE, perl = FALSE,
fixed = FALSE, useBytes = FALSE)
| pattern | a character string specifying the pattern to search for. The interpretation of pattern is controlled by the values of the perl and fixed arguments. See regexpr for details. |
| x, text | a vector of character strings in which to search. |
| ignore.case | a logical value. If TRUE, uppercase and lowercase characters are considered equivalent when matching. The default is FALSE. |
| perl | A logical value. If TRUE, the pattern is interpreted as a perl-compatible regular expression. If FALSE (the default), the pattern is interpreted as a POSIX extended regular expression. |
| value | A logical value. If TRUE, grep returns the matched elements of x themselves. If FALSE (the default), grep returns the indices of the matched elements of x. |
| fixed | A logical value. If TRUE, the pattern is treated as a literal sequence of characters. If FALSE (the default), the pattern is treated as a regular expression. |
| useBytes | a logical value. If TRUE, then the x and pattern strings are treated as a simple sequence of bytes. If FALSE, and if any of the x or pattern strings have 'bytes' encoding (see Encoding), then useBytes is set to TRUE. |
| invert | A logical value. If TRUE, returns items that do not match the pattern. If FALSE (the default), returns only items that match the patten. |
| grep | if value=FALSE, returns a numeric vector indicating which elements of x matched pattern. (numeric(0) specifies no matches.) If value=TRUE, returns the matching elements of x. (If they are not character data, they are converted to character data.) |
| grepl | returns a logical vector indicating which elements of x matched pattern. These return values can be used as a subscript to retrieve the matching elements of x. |
| regexec | returns a list of same length as given text; each element contains a sequence of integers representing the starting position of matches and all substrings corresponding to parenthesized subexpressions of pattern with the attribute "match.length". "match.length" is a vector of integers representing the length of matches. If there is no match, it returns -1. |
grep("ia$", Sdatasets::state.name, value=TRUE)
# returns all states that end in "ia"
grep("I", Sdatasets::state.name, value=TRUE, ignore.case=TRUE)
# returns all states containing "I" or "i"
grep("^[AEIOUY].*[aeiouy]$", Sdatasets::state.name, value=TRUE)
# returns states that begin and end with a vowel
grep("^[AEIOUY]|[aeiouy]$", Sdatasets::state.name, value=TRUE)
# returns states that begin or end with a vowel
grep("^[AEIOUY]|[aeiouy]$", Sdatasets::state.name, value=TRUE, invert = TRUE)
# returns states that do NOT begin or end with a vowel
grep("[aeiouy]{3,}", Sdatasets::state.name, value=TRUE)
# names with 3 or more vowels in a row
grep("^([^aeiouy][aeiouy]+)*$", Sdatasets::state.name, ignore.case=TRUE,
value=TRUE)
# names where every consonant is followed by at least one vowel
numStrings <- c("+1","-10","+3","0")
numStrings[grep("^\\+", numStrings)]
numStrings[grepl("^\\+", numStrings)]
grep("^\\+", numStrings, value=TRUE)
# 3 ways to get items starting with a plus sign
# using a backslash with grep:
str <- c("SP500","S.P500")
grep("^S.", str) # S followed by any character
# [1] 1 2
grep("^S\\.", str) # S followed by a period
# [1] 2
# examples for regexec
x<-c("S500S500", "SP500")
# with parenthesized subexpressions in pattern
regexec("(^S+)([0-9])", x)
regexec("(^S+)([0-9].*)", x)
# no parenthesized subexpressions in pattern
regexec("^S+[0-9]", x)