* Analysis was performed using Stata 16.0, 2020-10-10

* Table 2 
* Hazard ratios and 95% confidence intervals of breast cancer in relation to sense of coherence measured at baseline 

*load data
cd "H:\study1_SOC_BC"
use Data\dta\cohort, clear

*set follow-up endpoint
gen exit=min(bc_1stdiagdate,deathdate, new_emigration_date, date("20171006","YMD"))
gen bc_status=1 if exit==bc_1stdiagdate

*set time-to-event data
* OUTCOME: first breast cancer since entered Karma 
stset exit, failure(bc_status==1) id(studieid) origin(time studyentry_date) entry(time studyentry_date) exit(time exit) scale(365.25)


*output results
matrix A = J(4,9,.)

set more off

* recode exposure
gen exposure=.
replace exposure=0 if soc_1_cat ==1
replace exposure=1 if soc_1_cat ==2
replace exposure=2 if soc_1_cat ==3


* N/personyear/IR of events
foreach k of num 1/3  {  //exposure levels
stptime if exposure ==`k'-1, per(1000)
matrix A[`k',1] = r(failures) 
matrix A[`k',2] = r(rate)
matrix A[`k',3] = r(ptime)
 }
 
stptime, per(1000)
matrix A[4,1] = r(failures) 
matrix A[4,2] = r(rate)
matrix A[4,3] = r(ptime)

 
* Model 1: Unadjusted HR
*categorical
stcox ib0.exposure, nolog 

matrix A[1,4] = 1.0  //exposure level = unexposed

foreach k of num 1/2  {  //exposure levels = exposed
matrix A[`k'+1,4] = exp(_b[`k'. exposure])
matrix A[`k'+1,5] = exp(_b[`k'. exposure] - 1.96 * _se[`k'. exposure])
matrix A[`k'+1,6] = exp(_b[`k'. exposure] + 1.96 * _se[`k'. exposure]) 
}

*continuous
stcox zsoc_1, nolog

matrix A[4,4] = exp(_b[zsoc_1])
matrix A[4,5] = exp(_b[zsoc_1] - 1.96 * _se[zsoc_1])
matrix A[4,6] = exp(_b[zsoc_1] + 1.96 * _se[zsoc_1]) 


* Model 2: adjusted for sociodemographic + BC risk factors 
*categorical
stcox ib0.exposure age ib2.bmi_cat ib1.european_ancestry_iso ib3.education_level ///
ib0.smoking_status ib1.alcohol_gram_week_cat ib1.met_cat ///
ib3.menarche_age_cat ib2.age_firstbirth_cat ib2.preg_times_cat ib2.birth_times_cat ib1.contraception_ever ib0.hrt_ever ib1.postmenopausal ///
ib0.bc_1st_relative ib0.benign_breastdisorder ib0.othercancer_malign ib1.stratus_pd_cat, nolog

matrix A[1,7] = 1.0  //exposure level = unexposed

foreach k of num 1/2  {  //exposure levels = exposed
matrix A[`k'+1,7] = exp(_b[`k'. exposure])
matrix A[`k'+1,8] = exp(_b[`k'. exposure] - 1.96 * _se[`k'. exposure])
matrix A[`k'+1,9] = exp(_b[`k'. exposure] + 1.96 * _se[`k'. exposure]) 
}

*continuous
stcox zsoc_1 age ib2.bmi_cat ib1.european_ancestry_iso ib3.education_level ///
ib3.menarche_age_cat ib2.age_firstbirth_cat ib2.preg_times_cat ib2.birth_times_cat ib1.contraception_ever ib0.hrt_ever ib1.postmenopausal ///
ib0.smoking_status ib1.alcohol_gram_week_cat ib1.met_cat ///
ib0.bc_1st_relative ib0.benign_breastdisorder ib0.othercancer_malign ib1.stratus_pd_cat, nolog

matrix A[4,7] = exp(_b[zsoc_1])
matrix A[4,8] = exp(_b[zsoc_1] - 1.96 * _se[zsoc_1])
matrix A[4,9] = exp(_b[zsoc_1] + 1.96 * _se[zsoc_1]) 


 
 
matrix list A
matrix rownames A = weak moderate strong continuous

matrix colnames A =  event IR pyr hr_m1 lci_m1 uci_m1  hr_m2 lci_m2 uci_m2

xsvmat A, rownames(exposure_level) names(col) saving(Output\step2\Table2_1stsoc_raw, replace)

*format table
use Output\step2\Table2_1stsoc_raw, clear

*N/IR, persontime
gen eventIR= string(event, "%9.2gc") + " (" + string(IR,"%9.1f") + ")" 

gen Pyrs= string(pyr/1000,"%9.0f")

*HR
foreach i of num 1/2 { 
local r: word `i' of "hr_m1" "hr_m2" 
local l: word `i' of  "lci_m1" "lci_m2" 
local h: word `i' of  "uci_m1" "uci_m2" 
local H: word `i' of "HR_m1" "HR_m2"

gen `H' = "1.00" if `r'==1 
replace `H' = string(`r',"%9.2f") + " (" + string(`l',"%9.2f") + "-" + string(`h',"%9.2f") + ")" if `r'!=1 & `r'!=.
}

keep exposure_level eventIR  Pyr   HR_m1  HR_m2   
order exposure_level  Pyr eventIR   HR_m1  HR_m2   

* save output
save Output\step2\Table2_1stsoc, replace
export excel Output\step2\Table2_1stsoc.xlsx, firstrow(var) replace






* Figure 2.  Time-dependent effect of sense of coherence on incident breast cancer 

*load data
cd "H:\study1_SOC_BC"
use Data\dta\cohort, clear

*set follow-up endpoint
gen exit=min(bc_1stdiagdate,deathdate, new_emigration_date, date("20171006","YMD"))
gen bc_status=1 if exit==bc_1stdiagdate

*set time-to-event data
* OUTCOME: first breast cancer since entered Karma 
stset exit, failure(bc_status==1) id(studieid) origin(time studyentry_date) entry(time studyentry_date) exit(time exit) scale(365.25)


* recode exposure
gen exposure=.
replace exposure=0 if soc_1_cat ==1
replace exposure=1 if soc_1_cat ==2
replace exposure=2 if soc_1_cat ==3

*recode exposure into indicator variables
tab exposure, gen(exposure)  // exposure1 (weak), exposure2 (moderate), exposure3 (strong)
 


* Model 2: adjusted for sociodemographic + BC risk factors 
* without proportional assumption
* Flexible parametric model
stpm2 exposure2 exposure3 age ib2.bmi_cat ib1.european_ancestry_iso ib3.education_level ///
ib0.smoking_status ib1.alcohol_gram_week_cat ib1.met_cat ///
ib3.menarche_age_cat ib2.age_firstbirth_cat ib2.preg_times_cat ib2.birth_times_cat ib1.contraception_ever ib0.hrt_ever ib1.postmenopausal ///
ib0.bc_1st_relative ib0.benign_breastdisorder ib0.othercancer_malign ib1.stratus_pd_cat, df(3) scale(hazard) eform tvc(exposure2 exposure3) dftvc(2)

* plot time-dependent hazard ratios
predict hr2, hrnumerator(exposure2 1) ci
predict hr3, hrnumerator(exposure3 1) ci



* moderate SoC
twoway (rarea hr2_lci hr2_uci _t, sort pstyle(ci)) ///
		(line hr2 _t, sort lcolor(black)) ///
		,legend(off) ylabel(1 2, gstyle(dot)) ///
		xtitle("Time since entry (years)") ///
		ytitle("Hazard ratio") ///
		graphregion(color(white)) ///
		title("Moderate vs. weak SoC", color(black)) ///
		name("hr2_tvc_ci", replace)

* strong SoC		
twoway (rarea hr3_lci hr3_uci _t, sort pstyle(ci)) ///
		(line hr3 _t, sort lcolor(black)) ///
		,legend(off) ylabel(1 2, gstyle(dot)) ///
		xtitle("Time since entry (years)") ///
		ytitle("Hazard ratio") ///
		graphregion(color(white)) ///
		title("Strong vs. weak SoC", color(black)) ///
		name("hr3_tvc_ci", replace)

* save and export		
graph combine hr2_tvc_ci hr3_tvc_ci, col(1) graphregion(color(white)) name(hr_tvc_combine,replace)
graph export "H:\study1_SOC_BC\Output\step2\Figure2_hr_tvc_combine.pdf", as(pdf) replace

* export source data
keep _t hr2 hr2_lci hr2_uci hr3 hr3_lci hr3_uci
export excel Output\step2\Figure2_source_data.xlsx, firstrow(var) keepcellfmt replace