We're defining a robust version of a function that reads the HTML code from a given URL. Robust in the sense that we want it to handle situations where something either goes wrong (error) or not quite the way we planned it to (warning). The umbrella term for errors and warnings is condition
tryCatch
readUrl <- function(url) {
out <- tryCatch(
########################################################
# Try part: define the expression(s) you want to "try" #
########################################################
{
# Just to highlight:
# If you want to use more than one R expression in the "try part"
# then you'll have to use curly brackets.
# Otherwise, just write the single expression you want to try and
message("This is the 'try' part")
readLines(con = url, warn = FALSE)
},
########################################################################
# Condition handler part: define how you want conditions to be handled #
########################################################################
# Handler when a warning occurs:
warning = function(cond) {
message(paste("Reading the URL caused a warning:", url))
message("Here's the original warning message:")
message(cond)
# Choose a return value when such a type of condition occurs
return(NULL)
},
# Handler when an error occurs:
error = function(cond) {
message(paste("This seems to be an invalid URL:", url))
message("Here's the original error message:")
message(cond)
# Choose a return value when such a type of condition occurs
return(NA)
},
###############################################
# Final part: define what should happen AFTER #
# everything has been tried and/or handled #
###############################################
finally = {
message(paste("Processed URL:", url))
message("Some message at the end\n")
}
)
return(out)
}
Let's define a vector of URLs where one element isn't a valid URL
urls <- c(
"http://stat.ethz.ch/R-manual/R-devel/library/base/html/connections.html",
"http://en.wikipedia.org/wiki/Xz",
"I'm no URL"
)
And pass this as input to the function we defined above
y <- lapply(urls, readUrl)
# Processed URL: http://stat.ethz.ch/R-manual/R-devel/library/base/html/connections.html
# Some message at the end
#
# Processed URL: http://en.wikipedia.org/wiki/Xz
# Some message at the end
#
# URL does not seem to exist: I'm no URL
# Here's the original error message:
# cannot open the connection
# Processed URL: I'm no URL
# Some message at the end
#
# Warning message:
# In file(con, "r") : cannot open file 'I'm no URL': No such file or directory
length(y)
# [1] 3
head(y[[1]])
# [1] "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">"
# [2] "<html><head><title>R: Functions to Manipulate Connections</title>"
# [3] "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"
# [4] "<link rel=\"stylesheet\" type=\"text/css\" href=\"R.css\">"
# [5] "</head><body>"
# [6] ""
y[[3]]
# [1] NA