wvs_final = pd.read_csv('final_data/wvs_final.csv')


wvs_final.iloc[:, 27:].head()


incarceration_by_country = pd.read_csv('final_data/incarceration.csv', index_col=0)


incarceration_by_country.head()


fig.show()


fig, ax = plt.subplots(figsize=(8,5))
sb.histplot(wvs_final['prisonPopRate'], bins=np.arange(0,700,50))
ax.tick_params(labelsize=13)
ax.set_xlabel("Incarceration Rate",fontsize=10)
ax.set_ylabel("Number of Countries",fontsize=10)
ax.set_title("Distribution of Incarceration Rates",fontsize=20)


fig.show()


fig, ax = plt.subplots(figsize=(8,5))
sb.histplot(wvs_final['prisonPopRate'], bins=np.arange(0,700,50))
ax.tick_params(labelsize=13)
ax.set_xlabel("Incarceration Rate",fontsize=10)
ax.set_ylabel("Number of Countries",fontsize=10)
ax.set_title("Distribution of Incarceration Rates",fontsize=20)

Text(0.5, 1.0, 'Distribution of Incarceration Rates')


fig, ax = plt.subplots(figsize=(8,5))
sb.histplot(wvs_final['Q69'], bins=10)
ax.tick_params(labelsize=13)
ax.set_xlabel("Confidence in Police (Avg. Rating)",fontsize=10)
ax.set_ylabel("Number of Countries",fontsize=10)
ax.set_title("Distribution of Confidence in Police Ratings",fontsize=20)

Text(0.5, 1.0, 'Distribution of Confidence in Police Ratings')


fig.show()


fig, ax = plt.subplots(figsize=(8,5))
sb.histplot(wvs_final['Q70'], bins=10)
ax.tick_params(labelsize=13)
ax.set_xlabel("Confidence in Justice System (Avg. Rating)",fontsize=10)
ax.set_ylabel("Number of Countries",fontsize=10)
ax.set_title("Distribution of Confidence in Justice System",fontsize=20)

Text(0.5, 1.0, 'Distribution of Confidence in Justice System')


fig.show()


fig.show()


graphing = wvs_final.iloc[:,26:68].corr()
graphing.head()
f, ax = plt.subplots(figsize=(11, 9))
ax = sb.heatmap(graphing,cbar_kws={'label': 'Correlation'});
plt.xlabel('Features', fontsize=20);
plt.ylabel('Features', fontsize=20);
ax.figure.axes[-1].yaxis.label.set_size(20)
plt.title('Correlation Heatmap', fontsize=20);


fig.show()


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# pandas pivot
heatmap1_data = pd.pivot_table(wvs_final, values='prisonPopRate', index=['Continents'])
sns.heatmap(heatmap1_data, cmap="YlGnBu")

<AxesSubplot:ylabel='Continents'>


wvs_final.columns

Index(['Unnamed: 0', 'Alpha-3 code', 'Unnamed: 0.1', 'B_COUNTRY', 'C_COW_NUM',
       'A_YEAR', 'D_INTERVIEW', 'J_INTDATE', 'FW_END', 'FW_START',
       'K_DURATION', 'Q_MODE', 'N_REGION_ISO', 'N_REGION_WVS', 'N_TOWN',
       'G_TOWNSIZE', 'G_TOWNSIZE2', 'H_SETTLEMENT', 'H_URBRURAL', 'I_PSU',
       'O1_LONGITUDE', 'O2_LATITUDE', 'F_INTPRIVACY', 'E1_LITERACY',
       'W_WEIGHT', 'S018', 'pwght', 'Q19', 'Q21', 'Q23', 'Q29', 'Q33_3',
       'Q34_3', 'Q35_3', 'Q40', 'Q45', 'Q52', 'Q57', 'Q59', 'Q63', 'Q69',
       'Q70', 'Q71', 'Q122', 'Q124', 'Q126', 'Q129', 'Q130', 'Q131', 'Q132',
       'Q133', 'Q134', 'Q136', 'Q137', 'Q140', 'Q141', 'Q144', 'Q145', 'Q150',
       'Q156', 'Q157', 'Q178', 'Q179', 'Q180', 'Q181', 'Q191', 'Q192', 'Q194',
       'Q195', 'Country', 'prisonPopRate', 'pop2021', 'Continents'],
      dtype='object')


# Selecting Features of Interest
model_data = wvs_final[['Q19', 'Q21', 'Q23', 'Q29', 'Q33_3', 'Q34_3', 'Q35_3', 'Q40',
       'Q45', 'Q52', 'Q57', 'Q59', 'Q63', 'Q69', 'Q70', 'Q71', 'Q122', 'Q124',
       'Q126', 'Q129', 'Q130', 'Q131', 'Q132', 'Q133', 'Q134', 'Q136', 'Q137',
       'Q140', 'Q141', 'Q144', 'Q145', 'Q150', 'Q156', 'Q157', 'Q178', 'Q179',
       'Q180', 'Q181', 'Q191', 'Q192', 'Q194', 'Q195', 'prisonPopRate']]
model_data = model_data.loc[:,model_data.isna().sum()==0]


from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
# Performing a 70-30 train test split
trainingData, testingData = train_test_split(model_data, train_size=0.7, random_state = 10)
X_train = trainingData.drop(columns=['prisonPopRate'])
y_train = trainingData['prisonPopRate']
X_test = testingData.drop(columns=['prisonPopRate'])
y_test = testingData['prisonPopRate']



# The RMSE will be our metric to evaluate the accuracy of all models
def rmse_score(model, X, y):
    return np.sqrt(np.mean((y - model.predict(X))**2))

def per_error(model, X, y):
    return np.mean(abs((abs(y-model.predict(X))/y)))


# create and fit the model
lasso_reg = Lasso()  
lasso_reg.fit(X_train, y_train)
lasso_pred = lasso_reg.predict(X_train)

# Print Errors
print('Lasso Training RMSE: ' + str(rmse_score(lasso_reg, X_train, y_train)))
print('Lasso Training Percent Error: ' + str(per_error(lasso_reg, X_train, y_train)))

# plot the predictions
plt.style.use('default')
plt.scatter(y_train, lasso_pred)
plt.title('LASSO Model')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')

m, b = np.polyfit(y_train, lasso_pred, 1)

plt.plot(y_train, m*y_train + b, "r")

plt.show()

Lasso Training RMSE: 86.77870321632574
Lasso Training Percent Error: 0.40652496273631317


import plotly.graph_objects as go

print('Lasso Training RMSE: ' + str(rmse_score(lasso_model, X_train, y_train)))
print('Lasso Training Percent Error: ' + str(per_error(lasso_model, X_train, y_train)))
print('Lasso Validation RMSE: ' + str(min(cv_values)))
print("R^2 score: ",r2_score(y_train, lasso_model.predict(X_train)))
print('Optimal Alpha Value: ' + str(best_alpha))
fig = go.Figure()
fig.add_trace(go.Scatter(x = alphas, y = train_values, mode="lines+markers", name="Train"))
fig.add_trace(go.Scatter(x = alphas, y = cv_values, mode="lines+markers", name="CV"))
fig.update_layout(xaxis_title=r"$\alpha$", yaxis_title="CV RMSE", 
                  title_text='Training and CV Errors for Lasso Model', title_x=0.5)
fig.update_layout()

Lasso Training RMSE: 86.77870321632574
Lasso Training Percent Error: 0.40652496273631317
Lasso Validation RMSE: 184.84142440478826
R^2 score:  0.5311391049733521
Optimal Alpha Value: 1.0


# plot the predictions
plt.scatter(y_train, lasso_model.predict(X_train))
plt.title('LASSO Model')
plt.xlabel('actual values')
plt.ylabel('predicted values')

m, b = np.polyfit(y_train, lasso_model.predict(X_train), 1)

plt.plot(y_train, m*y_train + b, "r")

plt.show()


model_coefficients = pd.DataFrame(
    {'Predictor': model_data.columns[:-1],
     'LASSO Coefficient': lasso_model.coef_}
)
model_coefficients


model_coefficients.sort_values('LASSO Coefficient')


corrs = []

for c in model_data.columns[:-1]:
    corrs.append(np.corrcoef(model_data[c], model_data['prisonPopRate'])[0][1])
    
model_coefficients['Correlation'] = corrs
model_coefficients


model_coefficients.sort_values('Correlation')


fig.show()


fig.show()


fig.show()


wvs_final[wvs_final["Country"] == "United States"]


fig.show()


wvs_final[wvs_final["Country"] == "Thailand"]


fig.show()


wvs_final[wvs_final["Country"] == "China"]


fig.show()


wvs_final[wvs_final["Country"] == "Russia"]


fig.show()


wvs_final[wvs_final["Country"] == "Mexico"]

	Q19	Q21	Q23	Q29	Q33_3	Q34_3	Q35_3	Q40	Q45	Q52	...	Q180	Q181	Q191	Q192	Q194	Q195	Country	prisonPopRate	pop2021	Continents
0	1.961155	1.973108	1.961155	3.332997	2.089910	1.913828	2.037924	2.737738	1.609610	3.901198	...	1.676324	1.305694	1.296407	1.124000	1.337662	3.396603	Andorra	55	77.355	Europe
1	1.979063	1.958126	1.981057	3.137643	2.033401	1.562948	2.091966	2.620795	1.414195	2.883117	...	2.141117	1.875758	1.725806	1.606925	1.858617	3.383576	Argentina	230	45605.826	South America
2	1.966354	1.923331	1.971318	3.351230	2.090505	1.702899	2.153718	2.475455	1.531603	3.467482	...	1.848688	1.435025	1.595638	1.321968	1.711161	4.453169	Australia	160	25788.215	Australia
3	1.676819	1.592433	1.657939	2.213542	1.309383	1.183811	1.604219	1.470489	1.253785	3.618410	...	1.700833	1.607500	1.794167	1.795000	1.750833	2.752500	Bangladesh	52	166303.498	Asia
4	1.947222	1.897778	1.927222	2.862550	1.827737	1.186183	1.612506	1.954523	1.131707	3.159942	...	2.037745	2.045768	2.003425	1.923275	2.058736	3.649202	Bolivia	164	11832.940	South America

	Country	prisonPopRate	pop2021	Alpha-3 code
0	United States	639	332915.073	USA
1	El Salvador	566	6518.499	SLV
2	Turkmenistan	552	6117.924	TKM
3	Thailand	549	69950.850	THA
4	Palau	522	18.169	PLW

	Predictor	LASSO Coefficient	Correlation
0	Q19	-107.654831	0.071283
1	Q21	-0.000000	0.004033
2	Q23	-24.170106	0.106238
3	Q29	-53.413527	0.249873
4	Q33_3	0.000000	0.323862
5	Q34_3	3.067808	0.222622
6	Q35_3	0.000000	0.233588
7	Q40	-0.000000	0.251305
8	Q45	7.414830	-0.039580
9	Q52	-193.149738	-0.265061
10	Q57	-0.000000	-0.022344
11	Q59	28.207305	0.123451
12	Q63	-19.517960	-0.192532
13	Q122	19.471289	0.104536
14	Q124	-200.902152	0.069951
15	Q126	10.728769	0.118458
16	Q129	232.271589	0.199845
17	Q131	-0.718311	0.253023
18	Q144	0.000000	-0.192540
19	Q145	0.000000	-0.238960
20	Q150	-393.697317	-0.220355
21	Q156	4.450741	-0.038055
22	Q157	0.000000	0.142761
23	Q179	-0.000000	0.097236
24	Q180	-13.618422	0.137865
25	Q181	-101.522172	0.054166
26	Q191	102.523592	0.165764
27	Q195	6.838861	0.184667

	Predictor	LASSO Coefficient	Correlation
9	Q52	-193.149738	-0.265061
19	Q145	0.000000	-0.238960
20	Q150	-393.697317	-0.220355
18	Q144	0.000000	-0.192540
12	Q63	-19.517960	-0.192532
8	Q45	7.414830	-0.039580
21	Q156	4.450741	-0.038055
10	Q57	-0.000000	-0.022344
1	Q21	-0.000000	0.004033
25	Q181	-101.522172	0.054166
14	Q124	-200.902152	0.069951
0	Q19	-107.654831	0.071283
23	Q179	-0.000000	0.097236
13	Q122	19.471289	0.104536
2	Q23	-24.170106	0.106238
15	Q126	10.728769	0.118458
11	Q59	28.207305	0.123451
24	Q180	-13.618422	0.137865
22	Q157	0.000000	0.142761
26	Q191	102.523592	0.165764
27	Q195	6.838861	0.184667
16	Q129	232.271589	0.199845
5	Q34_3	3.067808	0.222622
6	Q35_3	0.000000	0.233588
3	Q29	-53.413527	0.249873
7	Q40	-0.000000	0.251305
17	Q131	-0.718311	0.253023
4	Q33_3	0.000000	0.323862

Introduction¶

Why does this matter?¶

Data

World Values Survey (WVS)

Incarceration Rates by Country

Visualization¶

Incarceration by Country¶

Confidence: The Police¶

Confidence: Justice System/Courts¶

Correlation: Confidence in Police / Criminal Justice¶

Modeling

Cross Validation¶

Qualitative Analysis of LASSO Model¶

Analysis and Case Studies

United States

Thailand

China

Russia

Mexico

Conclusion¶

Potential Gaps¶

Legal, Policy, and Ethical Implications¶

Analysis of Incarceration Rates Across Countries and Social Attitudes¶

Introduction¶

Why does this matter?¶

Data

World Values Survey (WVS)

Incarceration Rates by Country

Visualization¶

Incarceration by Country¶

Confidence: The Police¶

Confidence: Justice System/Courts¶

Correlation: Confidence in Police / Criminal Justice¶

Modeling

Cross Validation¶

Qualitative Analysis of LASSO Model¶

Analysis and Case Studies

United States

Thailand

China

Russia

Mexico

Conclusion¶

Potential Gaps¶

Legal, Policy, and Ethical Implications¶