merge(x, y, ...)
merge.default(x, y, ...)
merge.data.frame(x, y, by = intersect(names(x), names(y)), by.x = by,
    by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
    suffixes = c(".x", ".y"), incomparables = NULL, ...)
| x | a data frame, or data to be converted into a data frame. | 
| y | a data frame, or data to be converted into a data frame. | 
| by | a vector of columns by which to match.  This can be a vector of column
names, of column numbers, or of a logical vector with a TRUE or FALSE for
each column, specifying which columns to match by.
The special name row.names indicates to match by the row names
of the data frames.  In that case, a new column called Row.names is created. If you supply by, it is used for both x and y. If the by columns have different names or locations in x and y, then use by.x and by.y. The default value is the vector of column names that are common to x and y. | 
| by.x | see by. | 
| by.y | see by. | 
| all | specifies both all.x=TRUE and all.y=TRUE. | 
| all.x | a logical value; if TRUE, extra rows are added to the output, one for each row in x that has no matching row in y. These rows have NAs in the columns that are usually filled with values from y. The default is FALSE, so only rows with data from both x and y are included in the output. | 
| all.y | a logical value. Analogous to all.x, controlling when the output contains rows for y when no rows match an x row. | 
| sort | a logical flag. If TRUE (the default), indicates that the result should be sorted by by columns. | 
| suffixes | a character vector containing two distinct strings. If x and y have any column names in common, and those columns are not used for matching, the output has two columns with the same name, which is not allowed for data frames. merge pastes suffixes on these repeated column names to make them unique. The default is c(".x",".y"). | 
| incomparables | a vector of values that cannot be matched. If incomparables is NULL (the default) or FALSE, all values can be matched. See match for more details. | 
| ... | other arguments to be passed to or from methods. | 
# Create 2 data frames, one with information on authors
# and one concerning books.  Use merge to relate the
# names of the books written to attributes of the authors.
# Note that some authors have no books listed and some have
# several books.
authors <- data.frame(
    FirstName=c("Lorne", "Loren", "Robin",
                  "Robin", "Billy"),
    LastName=c("Green", "Jaye", "Green",
                 "Howe", "Jaye"),
    Age=c(82, 40, 45, 2, 40),
    Income=c(1200000, 40000, 25000, 0, 27500),
    Home=c("California", "Washington", "Washington",
    	"Alberta", "Washington"))
books <- data.frame(
    AuthorFirstName=c("Lorne", "Loren", "Loren",
        "Loren", "Robin", "Rich"),
    AuthorLastName=c("Green", "Jaye", "Jaye", "Jaye",
    	"Green", "Calaway"),
    Book=c("Bonanza", "Midwifery", "Gardening",
	"Perennials", "Who_dun_it?", "Support"))
# Look at all cases in which the author is in both the
# authors and books datasets.  Match author by both first
# and last names -- these have different labels in the 2
# datasets but are in the first 2 columns of both.
merge(authors, books, by=1:2)
# Produces the following output:
#   FirstName LastName Age  Income       Home        Book
# 1     Loren     Jaye  40   40000 Washington   Midwifery
# 2     Loren     Jaye  40   40000 Washington   Gardening
# 3     Loren     Jaye  40   40000 Washington  Perennials
# 4     Lorne    Green  82 1200000 California     Bonanza
# 5     Robin    Green  45   25000 Washington Who_dun_it?
# Next, make sure all authors in the authors dataset are
# listed, even if there is no book listed for them.  Using
# by.x and by.y may be a more reliable way to handle
# cases in which the datasets have different  names for
# the columns to match by.
merge(authors, books, by.x=c("FirstName", "LastName"),
      by.y=c("AuthorFirstName", "AuthorLastName"),
      all.x=TRUE)
# Produces the following:
#   FirstName LastName Age  Income       Home        Book
# 1     Billy     Jaye  40   27500 Washington        <NA>
# 2     Loren     Jaye  40   40000 Washington   Midwifery
# 3     Loren     Jaye  40   40000 Washington   Gardening
# 4     Loren     Jaye  40   40000 Washington  Perennials
# 5     Lorne    Green  82 1200000 California     Bonanza
# 6     Robin    Green  45   25000 Washington Who_dun_it?
# 7     Robin     Howe   2       0    Alberta        <NA>
merge(authors, books, by = 1:2, sort = FALSE)
# Produces the following:
#   FirstName LastName Age  Income       Home        Book
# 1     Lorne    Green  82 1200000 California     Bonanza
# 2     Loren     Jaye  40   40000 Washington   Midwifery
# 3     Loren     Jaye  40   40000 Washington   Gardening
# 4     Loren     Jaye  40   40000 Washington  Perennials
# 5     Robin    Green  45   25000 Washington Who_dun_it?
merge(authors, books, by = "row.names")
# Produces the following:
# Row.names FirstName LastName Age  Income  Home AuthorFirstName AuthorLastName Book
# 1  1  Lorne   Green  82 1200000 California   Lorne  Green  Bonanza
# 2  2  Loren    Jaye  40   40000 Washington   Loren  Jaye   Midwifery
# 3  3  Robin   Green  45   25000 Washington   Loren  Jaye   Gardening
# 4  4  Robin    Howe   2       0    Alberta   Loren  Jaye  Perennials
# 5  5  Billy    Jaye  40   27500 Washington   Robin  Green Who_dun_it?
#  "Loren" is not matched.  3 rows are gotten.
merge(authors, books, by.x = 1, by.y = 1, incomparables = c("Loren"))
# Produces the following:
#  FirstName LastName Age  Income    Home AuthorLastName   Book
# 1     Lorne    Green  82 1200000 California   Green    Bonanza
# 2     Robin    Green  45   25000 Washington   Green Who_dun_it?
# 3     Robin     Howe   2       0    Alberta   Green Who_dun_it?
# Use the state.x77 dataset to relate the income of the author
# to the median income of his or her home state (we have no
# information on Alberta, a Canadian province).  Note the use
# of "row.names" where the "column" to match on is not a variable
# in the dataset but is the names of the rows.  Both datasets have
# a column called "Income" which is not a key variable, so supply
# the suffixes argument to distinguish between them in the
# output (without suffixes they would be labeled "Income.x"
# and "Income.y").
state.data <- data.frame(Sdatasets::state.x77)
merge(authors, state.data[, "Income", drop=FALSE],
      by.x="Home", by.y="row.names", all.x=TRUE,
      suffixes=c("Author", "State"))
##         Home FirstName LastName Age IncomeAuthor IncomeState
## 1    Alberta     Robin     Howe   2            0          NA
## 2 California     Lorne    Green  82      1200000        5114
## 3 Washington     Loren     Jaye  40        40000        4864
## 4 Washington     Robin    Green  45        25000        4864
## 5 Washington     Billy     Jaye  40        27500        4864