0

Can someone explain why the marked line below returns numbers instead of setting the column names the strings in that row? How would I get the correct column names if I include the commented out colClasses line when reading the table?

url<-'http://qpublic7.qpublic.net/ga_subdivison.php?county=ga_clarke&searchType=nbhd&numberValue=4025R&nameValue=&sectionValue=&townshipValue=&rangeValue=&startDate=01-1998&endDate=&startPrice=&endPrice=&startArea=&endArea=&startAcreage=&endAcreage=&saleQualification=All&saleVacant=All&propertyType=All&reasonType=All&start=0'
library(XML)
#colClasses = c("character","character","character","character","integer","integer","integer","character","character","integer","character","character"),
data<-readHTMLTable(url,header=F,as.data.frame=T)
View(data)
csv<-as.data.frame(data)
colnames(csv)<-csv[4,] #why does this line return numbers?
colnames(csv)<-gsub(" ","",colnames(csv))
View(head(csv))
csv<-csv[-c(1:4),]
#####
View(csv)
1
  • 1
    FWIW, spaces aren't taxed in most jurisdictions. Commented Mar 13, 2016 at 23:14

1 Answer 1

1

You were bit by stringsAsFactors. Plus, your as.data.frame call was not the right thing to do (look at the data structure with str vs View).

library(XML)

URL <- 'http://qpublic7.qpublic.net/ga_subdivison.php?county=ga_clarke&searchType=nbhd&numberValue=4025R&nameValue=&sectionValue=&townshipValue=&rangeValue=&startDate=01-1998&endDate=&startPrice=&endPrice=&startArea=&endArea=&startAcreage=&endAcreage=&saleQualification=All&saleVacant=All&propertyType=All&reasonType=All&start=0'

csv <- readHTMLTable(URL, header=FALSE, as.data.frame=TRUE, stringsAsFactors=FALSE)[[2]]

colnames(csv) <- csv[4,]
colnames(csv) <- gsub(" ", "", colnames(csv))

csv <- csv[-c(1:4),]

dplyr::glimpse(csv)

## Observations: 52
## Variables: 11
## $ \/ParcelNumber\/ (chr) "173C2 F023", "173C2 G009", "173C2 G007", "17...
## $ PropertyType       (chr) "R", "R", "R", "R", "R", "R", "R", "R", "R"...
## $ SaleDate           (chr) "12-2015", "08-2015", "08-2015", "07-2015",...
## $ SalePrice          (chr) "200,000", "265,000", "210,000", "188,000",...
## $ HeatedSqFt         (chr) "1,538", "1,756", "1,415", "1,125", "1,559"...
## $ Acres              (chr) "0.30", "0.37", "0.37", "0.38", "0.32", "0....
## $ SaleQual           (chr) "Q", "Q", "Q", "Q", "Q", "Q", "U", "Q", "Q"...
## $ Reason             (chr) "FM", "FM", "FM", "FM", "FM", "FM", "B", "F...
## $ YearBuilt          (chr) "1952", "1954", "1963", "1963", "1998", "19...
## $ LocationAddress    (chr) "155 HARDIN DR", "140 HARDIN DR", "150 HARD...
## $ Neighborhood       (chr) "4025R-RIVERDALE", "4025R-RIVERDALE", "4025...


# or use the more modern xml2 ---------------------------------------------

library(xml2)
library(rvest)

pg <- read_html(URL)

csv2 <- html_table(html_nodes(pg, "table")[[2]], fill=TRUE)

colnames(csv2) <- csv[4,]
colnames(csv2) <- gsub(" ", "", colnames(csv))

csv2 <- csv2[-c(1:4), -c(12:13)]

dplyr::glimpse(csv2)

## Observations: 52
## Variables: 11
## $ \/ParcelNumber\/ (chr) "173C2 F023 ", "173C2 G009 ", "173C2 G007 ", ...
## $ PropertyType       (chr) "R ", "R ", "R ", "R ", "R ", "R ", "R ", "...
## $ SaleDate           (chr) "12-2015 ", "08-2015 ", "08-2015 ", "07-201...
## $ SalePrice          (chr) "200,000 ", "265,000 ", "210,000 ", "188,00...
## $ HeatedSqFt         (chr) "1,538 ", "1,756 ", "1,415 ", "1,125 ", "1,...
## $ Acres              (chr) "0.30 ", "0.37 ", "0.37 ", "0.38 ", "0.32 "...
## $ SaleQual           (chr) "Q ", "Q ", "Q ", "Q ", "Q ", "Q ", "U ", "...
## $ Reason             (chr) "FM ", "FM ", "FM ", "FM ", "FM ", "FM ", "...
## $ YearBuilt          (chr) "1952 ", "1954 ", "1963 ", "1963 ", "1998 "...
## $ LocationAddress    (chr) "155 HARDIN DR ", "140 HARDIN DR ", "150 HA...
## $ Neighborhood       (chr) "4025R-RIVERDALE ", "4025R-RIVERDALE ", "40...
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.