Topics covered in this homework include:
From your exam: “The data fraem Robey in the car
package has data on fertility and contraceptive use in 50 developing countries around 1990. There are three variables (1) region, a factor with four levels (Africa, Asia, Latin America, and Near East) (2) tfr, the total fertiility rate, the average number of children that a women would have over her lifetime at current rates (usually abbreviated TFR), and (3) contraceptors, the precentage of contraception among women of childbearing age.”
library(car)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
data.to.plot <- Robey %>%
mutate(revolution = (contraceptors > 50) ) %>%
group_by(region) %>%
summarise(prop.revolution = mean(revolution))
p <- ggplot(data.to.plot, aes(x=region, y=prop.revolution))
p + geom_bar(stat="identity")
# Now let's improve labels and order (alphabetical order is not a good order)
data.to.plot$region <- reorder(data.to.plot$region, data.to.plot$prop.revolution)
# to learn more about reordering factors see: https://kohske.wordpress.com/2010/12/29/faq-how-to-order-the-factor-variables-in-ggplot2/
p <- ggplot(data.to.plot, aes(x=region, y=prop.revolution))
p + geom_bar(stat="identity") +
scale_x_discrete(name = "Region") +
scale_y_continuous(name = "Proportion with > 50% contraceptive use")
data.to.plot <- Robey %>%
group_by(region) %>%
summarise(mean.tfr = mean(tfr))
p <- ggplot(data.to.plot, aes(x=region, y=mean.tfr))
p + geom_point()
# Now let's improve labels and order (alphabetical order is not a good order)
data.to.plot$region <- reorder(data.to.plot$region, data.to.plot$mean.tfr)
# to learn more about reordering factors see: https://kohske.wordpress.com/2010/12/29/faq-how-to-order-the-factor-variables-in-ggplot2/
p <- ggplot(data.to.plot, aes(x=mean.tfr, y=region))
p + geom_point() +
scale_x_continuous(name = "Mean Total Fertility Rate") +
scale_y_discrete(name = "Region")
p <- ggplot(Robey, aes(x=contraceptors, y=tfr))
p + geom_point() + stat_smooth(method = "lm")
p <- ggplot(Robey, aes(x=contraceptors, y=tfr))
p + geom_point() + stat_smooth(method = "lm") + facet_grid(. ~ region)
There is a negative relationship between contraceptive use and TFR in all four regions in this data.
You can read about it and see all of his code here: https://rpubs.com/bradleyboehmke/median_income_trends
The start code below gets you to where Brad Boehmke’s code begins.
## Loading required package: rJava
## Loading required package: xlsxjars
## State X2013 X2012 X2011 X2010 X2009 X2008 X2007 X2006 X2005
## 1 United States 51939 51758 51842 52646 54059 54423 56436 55689 55278
## 2 Alabama 41381 44096 44112 43733 43420 48119 47424 43848 44329
## 3 Alaska 61137 64573 59483 61804 66904 69230 70771 65183 66691
## 4 Arizona 50602 47728 50358 50103 49674 50757 53045 53905 53988
## 5 Arkansas 39919 39585 42778 41226 39681 42828 45832 42814 43742
## 6 California 57528 57849 55274 57996 60963 61684 62616 63913 61756
## 7 Colorado 63371 58087 60724 64353 60742 65935 68691 64350 60197
## 8 Connecticut 67781 65181 67752 70512 70430 69980 72061 72099 67817
## 9 Delaware 52219 49684 56613 58990 56597 54855 61330 60584 61135
## 10 D.C. 60675 66194 57225 60822 57713 60143 57054 56008 53687
## 11 Florida 47886 46740 46716 47080 49557 48531 51449 52772 51297
## 12 Georgia 47439 48820 47615 47134 47069 50013 54647 57010 54800
## 13 Hawaii 61408 57081 61156 63611 60437 66560 71928 69864 71100
## 14 Idaho 51767 48618 49154 50268 50802 51304 55257 53392 52712
## 15 Illinois 57196 52490 52446 54198 57418 57616 58989 56232 57750
## 16 Indiana 50553 46829 46033 49295 48117 50330 53313 52461 50637
## 17 Iowa 54855 54219 52013 52368 55085 54249 54947 55603 55485
## 18 Kansas 51485 50730 47796 49204 48564 51799 54485 52629 50148
## 19 Kentucky 42158 41683 41280 43915 46334 44518 44324 45619 43790
## 20 Louisiana 39622 39653 42111 41988 49342 42804 46414 42157 44431
## 21 Maine 50121 49872 51468 51209 51589 51096 53808 52733 52410
## 22 Maryland 65262 72880 71337 68592 69708 68930 73734 73559 72205
## 23 Massachusetts 62963 64581 65575 65102 64481 65261 65682 63926 66841
## 24 Michigan 48801 50742 50625 49441 49951 53866 55466 56204 54809
## 25 Minnesota 60907 62693 59886 55899 60916 59424 65227 64944 64691
## 26 Mississippi 40850 37173 42558 40770 38096 39431 41882 40129 39228
## 27 Missouri 50311 50487 47409 48951 52965 49809 51686 51504 51292
## 28 Montana 44132 45743 41716 44103 43916 46414 49046 47491 44523
## 29 Nebraska 53774 52954 57603 56095 53862 54883 55246 55624 57183
## 30 Nevada 45369 48021 48724 54702 55859 59228 60733 60404 57525
## 31 New Hampshire 71322 68805 68234 71190 69648 71596 75920 71597 67995
## 32 New Jersey 61782 67661 64565 67275 70350 70655 67980 78632 75613
## 33 New Mexico 42127 44055 43482 48221 47288 45551 49833 46246 46473
## 34 New York 53843 48373 52445 53186 54536 54594 54988 55713 56292
## 35 North Carolina 41208 42157 46821 46828 45511 46446 48886 45980 50183
## 36 North Dakota 52888 56576 58375 54495 54383 53696 53034 47424 50345
## 37 Ohio 46398 45020 46243 49024 49826 50778 55162 53031 52744
## 38 Oklahoma 43777 49110 50186 46051 49825 49888 48552 44872 44919
## 39 Oregon 56307 52527 53367 54063 53322 55964 56439 54407 52692
## 40 Pennsylvania 53952 52658 51693 51618 52316 55612 54418 56008 55247
## 41 Rhode Island 57812 56880 50785 55154 56076 57602 60904 62084 59046
## 42 South Carolina 43749 45046 41516 44550 44637 45608 49672 45772 48004
## 43 South Dakota 54453 50133 48910 48454 49768 55827 52150 52484 51489
## 44 Tennessee 42499 43620 43789 41230 44003 42954 46282 47015 47021
## 45 Texas 53027 52681 50799 50499 51559 50298 51740 50035 49426
## 46 Utah 62967 59189 57475 60579 63523 67659 60139 63115 65405
## 47 Vermont 54842 56390 53715 59753 56819 54859 53242 60056 60502
## 48 Virginia 67620 65571 64853 64496 65706 67062 66466 65993 61946
## 49 Washington 60106 63091 58881 60004 65588 61270 65252 63224 60432
## 50 West Virginia 40241 44186 43315 45703 43973 41106 47288 44388 43487
## 51 Wisconsin 55258 53850 53918 53795 55645 55394 57609 59723 53278
## 52 Wyoming 55700 58348 56456 55771 56984 57706 54763 54349 53359
## X2004 X2003 X2002 X2001 X2000 X1999 X1998 X1997 X1996 X1995 X1994 X1993
## 1 54674 54865 54913 55562 56800 56895 55497 53551 52471 51719 50148 49594
## 2 45172 47186 48690 46262 47918 50680 51759 46220 44798 39448 42271 39817
## 3 67906 65655 68335 75476 71487 71854 72348 69453 78028 72782 70514 68152
## 4 54073 52140 51450 56189 53815 51720 52935 47379 46772 46842 48639 48434
## 5 43144 40533 41936 43866 40171 41497 39484 37860 40098 39179 39736 36574
## 6 60702 62442 61424 62186 63328 60995 58421 57442 57379 56170 54915 54090
## 7 62755 63252 62534 64995 65255 67353 66506 62563 60540 61781 58804 54749
## 8 67951 69617 69128 70192 67868 70731 66376 63652 62268 61079 63877 62731
## 9 59256 62086 64289 65265 68129 65188 59169 62274 58114 53012 55757 57251
## 10 53585 57051 50590 54169 55761 54062 47716 46105 47258 46668 46809 43344
## 11 49989 49361 49235 47922 52561 50093 49822 46966 45299 45145 45531 45322
## 12 50543 53751 55600 56020 56680 55118 55183 53056 48042 51754 48909 50264
## 13 69360 65651 61250 62419 69727 62218 58268 59237 61755 65037 65677 67725
## 14 54704 53667 48835 50316 50877 50050 52350 48340 51314 49594 49016 49227
## 15 56824 57189 55303 60750 62311 64771 61624 59742 58476 57782 54526 52160
## 16 52202 53734 53150 53129 55278 57093 56704 56277 51961 50670 43300 46791
## 17 53511 52416 53152 53915 55449 57457 52834 48888 49096 53909 51414 45502
## 18 50644 56023 55185 54493 55541 52214 52394 52778 48173 46050 44021 47259
## 19 43916 46782 47601 50574 49056 47167 51739 48409 47919 45244 41336 38696
## 20 44926 42439 44035 43844 41553 45652 45292 48131 44739 42419 39908 41770
## 21 50968 47006 47719 48173 50410 54331 50865 47425 51294 51388 47120 43557
## 22 70422 66259 73039 70433 73770 72985 71383 67559 65039 62290 60925 63402
## 23 64152 64538 64555 68753 63243 61521 60435 60812 58388 58546 62949 58838
## 24 52112 57023 55310 59271 61564 64434 59687 56064 57990 55285 54842 51850
## 25 69190 66904 70727 69316 73386 65761 68400 61595 60601 57573 52293 53469
## 26 42861 41452 39988 39685 46397 45405 41560 41242 39439 40278 39479 35228
## 27 51965 55428 55389 54393 61003 57855 57375 52897 50657 52856 46924 45532
## 28 41876 43200 45106 42270 44338 43392 45067 42273 42406 42128 42947 42020
## 29 53999 55696 55415 57382 56476 54001 51969 50204 50286 49978 49417 49224
## 30 58214 57229 58214 59740 61897 57964 56740 56227 56977 54766 55754 56854
## 31 70066 70379 71633 67540 68888 64387 64164 59329 58259 59452 54781 60267
## 32 68167 70985 70658 68119 68183 69530 71112 69492 70176 66665 65716 64293
## 33 48789 44463 45912 43584 47471 45540 45018 43538 37087 39448 41818 42478
## 34 55063 54194 54340 55412 55115 55906 53369 51804 52350 50128 49580 50318
## 35 49623 47216 47282 50212 51832 52083 51148 51865 52632 48536 46806 45751
## 36 48368 51182 46874 47095 48692 45664 43250 45817 46525 44150 43952 44637
## 37 53097 55121 55269 54979 58115 55207 55554 52290 50369 53032 49512 49664
## 38 48853 45472 47208 46853 43871 45692 48135 45369 40563 39933 41952 41687
## 39 50555 52737 54127 54306 57489 56787 55756 53901 52471 55207 48892 52606
## 40 54393 54378 55029 57235 57052 52787 55682 54292 51594 52399 49840 49204
## 41 59115 56630 54924 60161 57080 59723 58067 50356 54680 53666 49625 53195
## 42 47715 48736 48961 49652 50821 50975 47479 49581 51248 44122 46389 41358
## 43 50695 50057 49040 52198 49340 50089 46792 42971 43651 44892 46214 44032
## 44 46952 47525 47948 47082 46122 51059 48655 44334 45520 44037 44513 39849
## 45 51052 49739 51987 53762 52227 54087 51070 50758 48893 48627 47802 45603
## 46 62736 62410 61973 62291 64321 64380 63224 61901 54757 55367 55513 56809
## 47 58368 54793 55677 53675 53559 58136 56192 50726 47838 51336 55647 49315
## 48 63069 69386 64265 66106 63798 63881 61875 62164 57969 54976 58514 57836
## 49 61566 60172 58505 55907 57524 63573 67679 64487 54222 53983 52120 56601
## 50 41157 41497 38016 39043 39785 40958 38112 39779 37325 37762 36625 35593
## 51 56398 58603 59438 59665 60991 63844 58982 57299 59137 62159 55003 50428
## 52 55985 53899 51487 52261 53606 52074 50309 48367 45761 47853 51509 46738
## X1992 X1991 X1990 X1989 X1988 X1987 X1986 X1985 X1984
## 1 49836 50249 51735 52432 51514 51121 50488 48761 47866
## 2 41982 40608 40356 38607 37745 38710 38797 37850 36965
## 3 68000 67739 67898 65311 62636 65189 63586 71810 69095
## 4 47757 51268 50493 51790 50019 52470 51710 49296 45752
## 5 38849 39089 39369 38877 38168 36931 37982 36029 33471
## 6 56778 56150 57518 59875 57307 59134 58828 55704 53999
## 7 52842 52539 53100 48623 49601 51935 55142 58184 55097
## 8 66437 70311 67159 76766 68520 64462 66354 64188 63959
## 9 58038 54351 53222 58168 57720 57365 51966 47444 55135
## 10 49204 49847 47327 48525 50598 53855 49322 43513 43580
## 11 44489 45455 46106 47315 48072 48037 46335 44064 42250
## 12 46845 45389 47619 49958 50267 52402 49419 43457 42675
## 13 68506 62125 67247 63550 62486 68699 58814 59792 61666
## 14 45067 43561 43721 44720 44371 40713 42076 42863 45041
## 15 51325 53181 56225 56775 55864 53128 53761 51346 50721
## 16 46410 45184 46526 46976 49750 44173 46089 46814 48624
## 17 46757 47625 47148 47642 45989 43528 45544 43205 42417
## 18 49365 48863 51690 48725 48375 50183 48519 47048 52594
## 19 38204 39638 42814 42233 37667 40552 40302 35843 37755
## 20 41382 42198 38711 41467 38783 41878 42362 43726 40465
## 21 48179 46483 47452 51190 49956 46293 47501 42363 44093
## 22 60519 61635 67136 65329 69162 68597 62061 62218 63440
## 23 59146 59570 62627 65456 62844 63243 61523 58235 57570
## 24 52489 53570 51724 55823 55765 54340 53951 50049 49041
## 25 50398 49170 54365 54752 55037 55085 53623 49252 52182
## 26 33462 32484 34863 36127 34373 36315 33486 33886 32950
## 27 44509 46580 47224 48063 44358 46529 44461 45295 44364
## 28 43149 41411 40387 42975 42064 40161 41222 41779 41718
## 29 48880 49287 47483 47740 47605 45642 44151 45006 45692
## 30 51906 54938 55329 53220 52948 52723 53164 48051 55044
## 31 64151 60100 70502 68079 65516 63434 61947 54511 55338
## 32 63442 66800 66924 70959 68660 67167 64314 63961 59314
## 33 42067 44268 43262 40998 36511 40719 40243 42165 44054
## 34 50511 53031 54582 57130 54711 51754 50747 48804 47038
## 35 45176 44790 45491 47898 46197 44646 44331 44287 43924
## 36 43855 43187 43651 45763 45586 44285 43615 43779 44356
## 37 51086 49689 51856 52641 52488 50556 50930 51974 49378
## 38 41130 42470 42130 42929 44781 42549 42480 43779 45161
## 39 51936 50356 50591 51749 52503 49114 50236 45202 45697
## 40 48610 50651 50114 52041 50600 49871 48277 47231 43448
## 41 49504 51433 55234 54642 56465 55497 53819 50840 46151
## 42 44862 45807 49648 43167 48312 49136 44548 41366 43369
## 43 42716 41097 42453 43729 42184 41489 40350 37456 41447
## 44 39559 40787 39034 41014 39463 41544 37021 36704 35837
## 45 45472 46258 48772 46954 47234 48492 48997 49019 49167
## 46 55717 46730 52079 55717 49788 52039 53294 52106 49237
## 47 53283 48630 53730 56766 54850 49854 49883 53679 48214
## 48 62138 60275 60598 61886 61775 58840 60258 58694 56643
## 49 55146 56661 55482 57974 61167 53588 54511 49550 53423
## 50 32975 38608 38248 39320 36619 33753 33387 32998 35967
## 51 54183 51929 53062 52826 55960 51725 53596 47993 44296
## 52 49142 48454 50900 53548 49989 54120 47774 45588 50858
A few notes:
In the past we have recommended using tbl_df()
but we don’t recommend that here because it will conflict with some of the way Boehmke wrote his code. If you want to use tbl_df() you will need to rewrite some of Boehmke’s code.
In step 7 of the code, you should do as much as possible with ggplot2. Do not use illustrator or any other program. We want to avoid things that are not automated.
# clean overall U.S. data and turn it into long format
us <- data %>%
filter(State == "United States") %>%
gather(Year, Income, X2013:X1984) %>%
separate(Year, c("left","Year"), sep="X") %>%
select(-left) %>%
arrange(Year)
# extract 1984 value as the baseline value and add to us dataframe
us_base <- us[us$Year==1984,3]
us$us_baseline <- us_base
# calculate the percent change in U.S. median income for each year as compared
# to 1984 (aka the baseline)
us <- us %>% mutate(us_change = (Income-us_baseline)/us_baseline)
# create a states dataframe, clean and turn into long format
states <- data %>%
filter(State != "United States") %>%
gather(Year, Income, X2013:X1984) %>%
separate(Year, c("left","Year"), sep="X") %>%
select(-left) %>%
arrange(Year) %>%
filter(Income != "NA")
# create baselines for each state
state_base <- states %>%
filter(Year == 1984) %>%
select(State, State_Baseline = Income)
# add baseline to the states and calculate the percent change in median income
# for each state as compared to 1984 (aka the baseline)
states <- states %>%
left_join(state_base) %>%
arrange(State) %>%
mutate(state_change = (Income-State_Baseline)/State_Baseline)
## Joining by: "State"
# change year variables from character to numeric
states$Year <- as.numeric(states$Year)
us$Year <- as.numeric(us$Year)
# get top 5 and bottom 5 states which will allow me to identify them
# graphically
rank <- states %>%
filter(Year == 2013) %>%
arrange(desc(state_change)) %>%
mutate(rank = seq(1,length(State), by=1)) %>%
filter(rank < 6 | rank > 46 )
p <- ggplot(states, aes(Year, state_change, group=State)) +
theme_bw() +
theme(plot.background = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(linetype = 3, colour = "grey50"),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank()) +
geom_line(colour="grey90", alpha=.9)
print(p)
p <- p +
geom_line(data=us, aes(Year, us_change, group=1), linetype=5)
print(p)
p <- p +
geom_line(data=filter(states, State=="Ohio"),
aes(Year, state_change, group=State), colour="dodgerblue", size = 1)
print(p)
### Step 4
p <- p +
geom_line(data=filter(states, State=="D.C."),
aes(Year, state_change, group=State), colour="grey70") +
geom_line(data=filter(states, State=="Nevada"),
aes(Year, state_change, group=State), colour="grey70")
print(p)
### Step 5
p <- p +
geom_point(data=rank, aes(Year, state_change), shape=21, size=1.5, alpha=.6) +
geom_point(data=filter(us, Year == 2013), aes(Year, us_change), size=2.5, alpha=.6)
print(p)
### Step 6
p <- p +
scale_y_continuous(limits=c(-.2,.55), breaks=seq(-.2,.4,by=.2), label=percent) +
scale_x_continuous(limits=c(1983,2013),breaks=seq(1985,2010,by=5), expand=c(0,.25))
print(p)
### Step 7
# Note that you should do as many of these annotations as possible in ggplot2, do not use illustrator
# [ your code here ]
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] xlsx_0.5.7 xlsxjars_0.6.1 rJava_0.9-6 scales_0.2.4
## [5] tidyr_0.2.0 ggplot2_1.0.0 dplyr_0.4.1 car_2.0-24
##
## loaded via a namespace (and not attached):
## [1] assertthat_0.1 colorspace_1.2-4 DBI_0.3.1 digest_0.6.8
## [5] evaluate_0.5.5 formatR_1.0 gtable_0.1.2 htmltools_0.2.6
## [9] knitr_1.9 labeling_0.3 lattice_0.20-29 lazyeval_0.1.10
## [13] lme4_1.1-7 magrittr_1.5 MASS_7.3-35 Matrix_1.1-4
## [17] mgcv_1.8-3 minqa_1.2.4 munsell_0.4.2 nlme_3.1-118
## [21] nloptr_1.0.4 nnet_7.3-8 parallel_3.1.2 pbkrtest_0.4-2
## [25] plyr_1.8.1 proto_0.3-10 quantreg_5.11 Rcpp_0.11.4
## [29] reshape2_1.4.1 rmarkdown_0.5.1 SparseM_1.6 splines_3.1.2
## [33] stringi_0.4-1 stringr_0.6.2 tools_3.1.2 yaml_2.1.13