Sorry this format isn’t very fancy
————————-
import pyreadstat
import pandas as pd
import numpy as np
def gettingData():
pathToSav = r”C:\Users\brigh\OneDrive\work\notWork\producingStatistics\religiousSwitching\Pew-Research-Center-2014-U.S.-Religious-Landscape-Study\Dataset – Pew Research Center 2014 Religious Landscape Study National Telephone Survey – Version 1.1 – December 1 2016.sav”
old_df, meta = pyreadstat.read_sav(pathToSav)
codebook = {
10000.0:”Catholic”,
1100.0:”Evangelical Protestant Tradition”,
1200.0:”Mainline Protestant Tradition”,
100000.0:”Unaffiliated”,
1300.0:”Historically Black Protestant Tradition”,
50000.0:”Jewish”,
20000.0:”Mormon”,
60000.0:”Muslim”,
30000.0:”Orthodox Christian”,
40001.0:”Jehovah’s Witness”,
80000.0:”Hindu”,
900000.0:”Don’t know/refused”,
40002.0:”Other Christian”,
70000.0:”Buddhist”,
90002.0:”Other Faiths”,
90001.0:”Other World Religions”
}
christianCodebook = {
10000.0:”Catholic”,
1100.0:”Evangelical Protestant Tradition”,
1200.0:”Mainline Protestant Tradition”,
1300.0:”Historically Black Protestant Tradition”,
20000.0:”Mormon”,
30000.0:”Orthodox Christian”,
40001.0:”Jehovah’s Witness”,
40002.0:”Other Christian”
}
old_df[“CHRELTRAD”] = old_df[“CHRELTRAD”].replace({90001.0:90002.0})
old_df[“RELTRAD”] = old_df[“RELTRAD”].replace({90001.0:90002.0})
return old_df, christianCodebook, codebook
old_df, christianCodebook, codebook = gettingData()
######
def unweightedAndMargins(old_df, codebook):
# 1. Choose columns
child_col = “CHRELTRAD” # childhood religion
current_col = “RELTRAD” # current religion
# 2. Drop missing values
df = old_df.copy()[[child_col, current_col]].dropna(subset=[child_col, current_col])
# 3. Map numeric codes to readable labels
df[child_col] = df[child_col].map(codebook)
df[current_col] = df[current_col].map(codebook)
# 4. Create unweighted cross-tab with totals
cross_tab = pd.crosstab(
df[child_col],
df[current_col],
margins=True,
margins_name=’Total’
)
# 5. Row proportions (childhood → current)
#cross_tab_prop = cross_tab.div(cross_tab.sum(axis=1), axis=0)
cross_tab_prop = cross_tab.div(cross_tab[“Total”], axis=0)
# 6. Proportion of total population (childhood religion)
total_count = cross_tab.loc[‘Total’, ‘Total’]
cross_tab_prop[‘Total’] = cross_tab[‘Total’] / total_count
# 7. Proportion of total population (current religion)
cross_tab_prop.loc[‘Total’, :] = cross_tab.loc[‘Total’, :] / total_count
# 8. Save to CSV if desired
cross_tab_prop.to_csv(“religion_switching_prop_unweighted_margin.csv”)
#########################
# Compute Markov equilibrium
#########################
# Remove Total row/column to make transition matrix
transition_matrix = cross_tab_prop.drop(index=’Total’, columns=’Total’)
# Convert to NumPy array
P = transition_matrix.to_numpy()
# Compute stationary distribution: π such that π P = π
eigvals, eigvecs = np.linalg.eig(P.T)
index = np.argmin(np.abs(eigvals – 1))
stationary = np.real(eigvecs[:, index])
stationary /= stationary.sum() # normalize to sum to 1
# Convert to DataFrame for readability
stationary_df = pd.DataFrame({
‘Religion’: transition_matrix.index,
‘Equilibrium Proportion’: stationary
}).sort_values(‘Equilibrium Proportion’, ascending=False)
print(stationary_df)
# Religion Equilibrium Proportion
# 3 Evangelical Protestant Tradition 0.291407
# 15 Unaffiliated 0.279449
# 8 Mainline Protestant Tradition 0.161198
# 1 Catholic 0.075496
# 5 Historically Black Protestant Tradition 0.054857
# 13 Other Faiths 0.026388
# 7 Jewish 0.025019
# 9 Mormon 0.024101
# 0 Buddhist 0.012054
# 2 Don’t know/refused 0.010115
# 14 Other World Religions 0.008207
# 6 Jehovah’s Witness 0.008102
# 10 Muslim 0.007859
# 4 Hindu 0.006371
# 11 Orthodox Christian 0.004777
# 12 Other Christian 0.004600
unweightedAndMargins(old_df, codebook)
def christianSwitchingStatsUnWeighted(old_df, codebook, christianCodebook):
child_col = “CHRELTRAD”
current_col = “RELTRAD”
# Drop missing values
df = old_df.copy()[[child_col, current_col]].dropna()
# Map codes
df[child_col] = df[child_col].map(codebook)
df[current_col] = df[current_col].map(codebook)
# Build unweighted cross-tab (raw counts)
cross_tab = pd.crosstab(
index=df[child_col],
columns=df[current_col],
margins=True,
margins_name=”Total”
)
# Row proportions (childhood → current)
#cross_tab_pct = cross_tab.div(cross_tab.sum(axis=1), axis=0)
cross_tab_pct = cross_tab.div( cross_tab[“Total”] , axis=0)
# Proportion of population (childhood)
total_count = cross_tab.loc[“Total”, “Total”]
cross_tab_pct[“Total”] = cross_tab[“Total”] / total_count
# Proportion of population (current)
cross_tab_pct.loc[“Total”, :] = cross_tab.loc[“Total”, :] / total_count
# Transition matrix for equilibrium (drop totals)
transition_matrix = cross_tab_pct.drop(index=”Total”, columns=”Total”)
P = transition_matrix.to_numpy()
# Markov stationary distribution
eigvals, eigvecs = np.linalg.eig(P.T)
idx = np.argmin(np.abs(eigvals – 1))
stationary = np.real(eigvecs[:, idx])
stationary /= stationary.sum()
stationary_df = pd.Series(stationary, index=transition_matrix.index)
# making flipped version
switched_cross_tab = pd.crosstab(
index=df[current_col],
columns=df[child_col],
margins=True,
margins_name=”Total”
)
switched_cross_tab_pct = switched_cross_tab.div( switched_cross_tab[“Total”] , axis=0)
switched_total_count = switched_cross_tab.loc[“Total”, “Total”]
switched_cross_tab_pct[“Total”] = switched_cross_tab[“Total”] / switched_total_count
switched_cross_tab_pct.loc[“Total”, :] = switched_cross_tab.loc[“Total”, :] / switched_total_count
switched_transition_matrix = switched_cross_tab_pct.drop(index=”Total”, columns=”Total”)
##############################
# Christian vs Non-Christian
##############################
christian_set = set(christianCodebook.values())
stats = []
for religion in transition_matrix.index:
row = switched_transition_matrix.loc[religion]
# From other Christian vs non-Christian → converts IN
total_in = row.sum()
from_christian = row[row.index.isin(christian_set)].sum() – (row[religion] if religion in christian_set else 0)
from_non = total_in – from_christian – row[religion]
# Converts OUT
row = transition_matrix.loc[religion]
total_out = row.sum()
to_christian = row[row.index.isin(christian_set)].sum() – (row[religion] if religion in christian_set else 0)
to_non = total_out – to_christian – row[religion]
# Current share of population
present_share = cross_tab_pct.loc[“Total”, religion]
# Born with this faith
born_share = cross_tab.loc[religion, “Total”] / total_count
# Equilibrium share
eq_share = stationary_df[religion]
stats.append({
“Religion”: religion,
“Converts from Christian”: from_christian,
“Converts from non-Christian”: from_non,
“Leaves to Christian”: to_christian,
“Leaves to non-Christian”: to_non,
“Present population share”: present_share,
“Born with this faith”: born_share,
“Equilibrium share”: eq_share
})
result_df = pd.DataFrame(stats)#.sort_values(“Present population share”, ascending=False)
# Forecasted growth relative to present
result_df[“forecastedGrowth”] = (result_df[“Equilibrium share”] / result_df[“Present population share”]) – 1
result_df.to_csv(“unweighted_by_religion.csv”)
print(result_df)
return result_df
christianSwitchingStatsUnWeighted(old_df, codebook, christianCodebook)
# checkingConvertNumbers
def print_convert_shares(old_df, codebook):
child_col = “CHRELTRAD”
current_col = “RELTRAD”
df = old_df[[child_col, current_col]].dropna()
df[child_col] = df[child_col].map(codebook)
df[current_col] = df[current_col].map(codebook)
religionsz = list(codebook.values())
religionsz.sort()
for religion in religionsz:
print(religion, np.average(np.logical_and(df[current_col] == religion, df[child_col] != religion)) / np.average(df[current_col] == religion))
switched_cross_tab = pd.crosstab(
index=df[current_col],
columns=df[child_col],
margins=False
)
# Proportion of current members by childhood religion
switched_cross_tab_pct = switched_cross_tab.div(switched_cross_tab.sum(axis=1), axis=0)
# Extract diagonal (born into that religion)
born_share = np.diag(switched_cross_tab_pct.to_numpy())
# Convert share = 1 – born_share
convert_share = 1 – born_share
convert_df = pd.DataFrame({
“Religion”: switched_cross_tab_pct.index,
“Convert share”: convert_share
}).reset_index(drop=True)
print(convert_df)
return convert_df
convert_df = print_convert_shares(old_df, codebook)
Leave a Reply