graph set window fontface "Courier"
	graph set ps fontface "Courier"

*** Use microdata appended in separate do file
	*use "$output/DHS/DHSallcountries2.dta", clear
	use "$output/DHSblog.dta", clear	
	kountry ccode, from(iso3c) 
	rename NAMES country

*** Convert Nepalese calendar (roughly)
	replace interviewyear=interviewyear-57 if interviewyear>=2050 

*** Few mistakes in country codes
	*Guatemala code "gu" accidentally as GUM. Change to GTM.
	replace ccode="GTM" if ccode=="GUM"
	*Kyrgyzstan code "ky" accidentally as CYM. Change to KGZ.
	replace ccode="KGZ" if ccode=="CYM"
	replace ccode="MDG" if countryphase=="MD4"
	replace ccode="MDG" if countryphase=="MD5"
	replace ccode="NIC" if countryphase=="NC4"
	replace ccode="NER" if countryphase=="NI5"
	replace ccode="NER" if countryphase=="NI6"
	*kountry command mistake:
	replace country="Nicaragua" if country=="New Caledonia"
	
*** Sample restriction
	keep if age>=25 & age<=34

*** Define key vars
	gen s 	= edsingleyrs if edsingleyrs<30
	replace literacy = . if literacy>=3
	replace literacy = literacy==2 if literacy!=.
	replace edlevel=2 if s>=3 & ccode=="BOL"	// Only administered literacy test for grades 0-2
	replace edlevel=2 if s>=3 & ccode=="HND"	// Only administered literacy test for grades 0-2

*** Simple data table for blog
	preserve
		drop if s>8
		collapse (count) obs=literacy, by(country s)
		tempfile obs
		save `obs'
	restore
	preserve
		drop if edlevel>=2
		drop if s>8
		collapse (mean) literacy [pw=sampleweight], by(country s)
		mmerge country s using `obs'
		drop _merge
		drop if obs<50
		drop if lit<.000001 | lit>.99999
		rename literacy lit
		replace lit = lit*100
		drop obs
		reshape wide lit, i(country) j(s)
		format lit* %4.0f
		forval p=10(10)90{
			gen r`p' = .
			forval g = 0/8{
				replace r`p' = `g' if r`p'==. & lit`g'>=`p' & lit`g'!=.
			}
		}
		gsort /*r90 r80 r70 r60*/ r50 r40 r30 r20 r10 -lit5 -lit4
		drop r*
		outsheet using "$figures/blog_list.csv", comma replace
	restore


*******************
*** SCATTERPLOT ***
*******************
	
*** Change in literacy (among non-completers) on change in primary completion

*** Phases, for country panel
	by countryphase, sort: egen nlit = count(literacy)
	drop if nlit==0
	gen phase = substr(countryphase,-1,.)
	destring phase, replace

*** Collapse for graphs
	gen graduates = edlevel>=2
	gen literates = literacy if edlevel<2
	
	preserve
		keep if age>=25 & age<=34
		collapse (mean) graduates literates t=interviewyear [aw=sampleweight], by(ccode phase)
		sort ccode phase
		bysort ccode: gen Dlit = lit - lit[_n-1]
		bysort ccode: gen Dgrad = grad - grad[_n-1]
		bysort ccode: gen Dt = t - t[_n-1]
		replace Dlit = Dlit/Dt
		replace Dgrad = Dgrad/Dt
		global sc = "msymbol(none) mlabel(ccode) mlabpos(0) mlabcol(black)"
		global lc = "lwidth(medthick) lcolor(emerald*.75)"
		twoway 	(lfitci Dlit Dgrad)				///
				(lfit Dlit Dgrad, $lc)			///
				(scatter Dlit Dgrad, $sc),		///
				legend(off)						///
				title("Annual changes", 		///
					color(black))				///
				plotregion(style(none) 			///
					lcolor(gs10) 				///
					margin(large))				///				
				graphregion(fcol(white) 		///
					lcol(white) margin(medium))	///
				ysc(lcolor(gs14))				///
				xsc(lcolor(gs14))				///
				ylabel(/*-.02 "-2%"*/ -.01 "-1%"	///
					0 "0%" .01 "1%" .02 "2%"	///
					, notick					///
					angle(0)  glcolor(gs14))	///					
				xlabel(-.01 "-1%" 0 "0%" 		///
					.01 "1%" .02 "2%" .03 "3%",	///
					notick grid glcolor(gs14))	///
				xtitle("Annual change in primary completion", size(small))		///
				ytitle("Change in literacy for non-completers", size(small))	///
				note("Women age 25-34. All changes are annualized between survey rounds.""Each country appears up to 3 times between 2000 & 2015.", span justif(left) astext)	///
				title("Left behind?", yoffset(2) span justif(left) astext)	///
				subtitle("As more women complete primary school, literacy""has risen only slightly for those who don't.", yoffset(2) span justif(left) astext)
						
		gr_edit .style.editstyle declared_ysize(6) editcopy
		gr_edit .style.editstyle declared_xsize(7) editcopy
		gr export "$figures/selection_blog.pdf", as(pdf) replace	
		gr export "$figures/selection_blog.png", as(png) width(1000) replace	
	restore
	
exit

*/

**********************
*** LEARNING GRAPH ***
**********************

*** Sample restriction
	drop if edlevel>1 & ccode!="GHA" & ccode!="YEM" // with 2 exceptions, literacy assumed after primary
	keep if s<=8
	keep if literacy!=.

*** Only last round per country
	sort countryphase
	encode countryphase, g(svycode)
	bysort ccode: egen last=max(svycode)
	keep if svycode == last
	keep ccode literacy s sampleweight

		
*** Local polynomials of literacy on grade
	levelsof ccode, l(countries)
	foreach c of local countries{
		di "`c'"
		capture drop grade
		capture lpoly literacy s	///
			if ccode=="`c'",	///
			n(9) gen(grade lhat`c')	///
			nograph
	}
	capture drop grade
	gen grade = _n - 1
	keep if grade<=8
	keep grade lhat*
	reshape long lhat, i(grade) j(ccode) string

/*
	save "$output/temp_lhat", replace
exit
	use "$output/temp_lhat", clear
*/
	
*** Generate labels for histogram
	gen bin = round(lhat,0.05)
	forval s = 0/8{
		gen binlab`s' = ""
		preserve
			keep if g==`s'
			levelsof bin, l(bins`s')
		restore
		foreach m of local bins`s'{
			local binlab`s' = ""
			preserve
				keep if g==`s' & bin>`m'-.01 & bin<`m'+.01
				levelsof ccode, l(c`s')
				foreach c of local c`s'{
					local binlab`s' = "`binlab`s'' `c'"
				}
			restore
			tempvar counter
			bysort bin g: gen `counter'=_n
			replace binlab`s' = "`binlab`s''" if g==`s' & bin>`m'-.01 & bin<`m'+.01 & `counter'==1
		}
	}
	

*** GRAPH
	global lc = "lwidth(medthick) lcolor(emerald*.75)"
	*forval s = 0/8{
	forval s = 8/8{
	preserve
		keep if g<=`s'
		levelsof ccode, l(countries)
		foreach c of local countries{
			local line `line' (line lhat g if ccode=="`c'", $lc)
		}
		twoway `line'							///
			(scatter bin grade 					///
					if grade==`s', 				///
					msymbol(none)				///
					mlab(binlab`s') 			///
					mlabpos(3) mlabcol(black)),	///
			legend(off)							///
			plotregion(style(none) 				///
				lcolor(none) margin(medium))	///				
			graphregion(fcol(white) 			///
				lcol(white))					///
			xlabel(0 1 2 3 4 5 6 7 8, notick)	///
			ylabel(	0 "0%" .2 "20%" .4 "40%"	///
					.6 "60%" .8 "80%" 1 "100%", ///
					angle(0) nogrid notick)		///					
			xtitle("Highest grade attained", 	///
					align(west))				///
			ytitle("Literacy rate") ysc(noline)	///
			xsc(range(12.5) noline)				///
			title("Where does schooling produce literacy?",	///
					justif(left))				///				
			subtitle("Women age 25-34 by highest grade attained",	///
					justif(left))				///	
			note(	"Source: Authors' calculations based on 50+ rounds of DHS microdata."	///
					"Lines show a local polynomial regression of a binary literacy indicator on"	///
					"highest grade attained.")
			
		gr_edit .xaxis1.title.style.editstyle box_alignment(west) editcopy
		gr_edit .style.editstyle declared_ysize(5) editcopy
		gr_edit .style.editstyle declared_xsize(9) editcopy
		gr_edit .title.as_textbox.setstyle, style(yes)
		gr_edit .subtitle.as_textbox.setstyle, style(yes)
		gr export "$figures/lit`s'_blog.png", as(png) width(1000) replace	
		gr export "$figures/lit`s'_blog.pdf", as(pdf) replace	
		
	restore
	}
	
exit