首页下载资源后端机器学习-保险风险预测(基于神经网络和SVM实现,包含完整的代码和数据,数据来源与保诚公司)

ZIP机器学习-保险风险预测(基于神经网络和SVM实现,包含完整的代码和数据,数据来源与保诚公司)

qq47522525333.25MB需要积分:1

资源文件列表:

保险风险预测 大约有9个文件
  1. 保险风险预测/
  2. 保险风险预测/代码/
  3. 保险风险预测/代码/SVM-RiskAssessment.py 6.82KB
  4. 保险风险预测/代码/nn-RiskAssessment.py 3.46KB
  5. 保险风险预测/代码/preprocess.py 7.51KB
  6. 保险风险预测/数据/
  7. 保险风险预测/数据/application_test.csv 25.34MB
  8. 保险风险预测/数据/predict.csv 6.32MB
  9. 保险风险预测/数据/train.csv 141.81MB

资源介绍:

机器学习--保险风险预测(基于神经网络和SVM实现,包含完整的代码和数据,数据来源与保诚公司)
#!/usr/bin/env python #_*_coding:utf-8_*_ import pandas as pd import warnings warnings.filterwarnings("ignore") from pandas.plotting import andrews_curves from pandas.plotting import radviz from pandas.plotting import parallel_coordinates import numpy as np import seaborn as sns import matplotlib.pyplot as plt sns.set(style="white", color_codes=True) train_data = pd.read_csv('D:\\DataSet\\Prudential_Life_Insurance_Assessment' '\\train\\train.csv') # print(train_data.columns) # print(train_data.info()) # print(train_data.shape) # print(train_data.describe()) # total = train_data.isnull().sum().sort_values(ascending=False) # percent = (train_data.isnull().sum()/train_data.isnull().count())\ # .sort_values(ascending=False) # Missing_Value = pd.concat([total, percent], axis=1, # keys=['Total', 'Percent']) # print(Missing_Value) train_data = train_data.drop(['Medical_History_10'], axis=1) train_data = train_data.drop(['Medical_History_32'], axis=1) train_data = train_data.drop(['Medical_History_24'], axis=1) train_data = train_data.drop(['Medical_History_15'], axis=1) train_data = train_data.drop(['Family_Hist_5'], axis=1) train_data = train_data.drop(['Family_Hist_3'], axis=1) train_data = train_data.drop(['Family_Hist_2'], axis=1) train_data = train_data.drop(['Insurance_History_5'], axis=1) train_data = train_data.drop(['Family_Hist_4'], axis=1) # # total = train_data.isnull().sum().sort_values(ascending=False) # percent = (train_data.isnull().sum()/train_data.isnull().count())\ # .sort_values(ascending=False) # Missing_Value = pd.concat([total, percent], axis=1, # keys=['Total', 'Percent']) # print(Missing_Value) train_data['Employment_Info_6'] = train_data['Employment_Info_6'].fillna( train_data['Employment_Info_6'].mean()) train_data['Medical_History_1'] = train_data['Medical_History_1'].fillna( train_data['Medical_History_1'].mean()) train_data['Employment_Info_4'] = train_data['Employment_Info_4'].fillna( train_data['Employment_Info_4'].mean()) train_data = train_data.drop(train_data[train_data['Employment_Info_1'] .isnull()].index) train_data.groupby(['Id', 'InsuredInfo_1', 'InsuredInfo_2', 'InsuredInfo_3', 'InsuredInfo_4', 'InsuredInfo_5', 'InsuredInfo_6', 'Insurance_History_1', 'Insurance_History_2', 'Insurance_History_3', 'Insurance_History_4', 'Insurance_History_7', 'Insurance_History_8', 'Insurance_History_9'], as_index=False) Info1_count = train_data.groupby('Id', as_index=False)['InsuredInfo_1'].agg( {'Info1_count': 'count'}) Info2_count = train_data.groupby('Id', as_index=False)['InsuredInfo_2'].agg( {'Info2_count': 'count'}) Info3_count = train_data.groupby('Id', as_index=False)['InsuredInfo_3'].agg( {'Info3_count': 'count'}) Info4_count = train_data.groupby('Id', as_index=False)['InsuredInfo_4'].agg( {'Info4_count': 'count'}) Info5_count = train_data.groupby('Id', as_index=False)['InsuredInfo_5'].agg( {'Info5_count': 'count'}) Info6_count = train_data.groupby('Id', as_index=False)['InsuredInfo_6'].agg( {'Info6_count': 'count'}) Info7_count = train_data.groupby('Id', as_index=False)['Insurance_History_1']\ .agg({'Info7_count': 'count'}) Info8_count = train_data.groupby('Id', as_index=False)['Insurance_History_2']\ .agg({'Info8_count': 'count'}) Info9_count = train_data.groupby('Id', as_index=False)['Insurance_History_3']\ .agg({'Info9_count': 'count'}) Info10_count = train_data.groupby('Id', as_index=False)['Insurance_History_4']\ .agg({'Info10_count': 'count'}) Info11_count = train_data.groupby('Id', as_index=False)['Insurance_History_7']\ .agg({'Info11_count': 'count'}) Info12_count = train_data.groupby('Id', as_index=False)['Insurance_History_8']\ .agg({'Info12_count': 'count'}) Info13_count = train_data.groupby('Id', as_index=False)['Insurance_History_9']\ .agg({'Info13_count': 'count'}) train_data = pd.merge(train_data, Info1_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info2_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info3_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info4_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info5_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info6_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info7_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info8_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info9_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info10_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info11_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info12_count, on=['Id'], how='left') train_data = pd.merge(train_data, Info13_count, on=['Id'], how='left') print(list(set(train_data['Product_Info_2']))) Product_Info_2_map = {'A1': 1, 'A2': 2, 'A3': 3, 'A4': 4, 'A5': 5, 'A6': 6, 'A7': 7, 'A8': 8, 'B1': 9, 'B2': 10, 'C1': 11, 'C2': 12, 'C3': 13, 'C4': 14, 'D1': 15, 'D2': 16, 'D3': 17, 'D4': 18, 'E1': 19} train_data['Product_Info_2'] = train_data['Product_Info_2']\ .map(Product_Info_2_map) # total = train_data.isnull().sum().sort_values(ascending=False) # percent = (train_data.isnull().sum()/train_data.isnull().count())\ # .sort_values(ascending=False) # Missing_Value = pd.concat([total, percent], axis=1, # keys=['Total', 'Percent']) # print(Missing_Value) train_data_Sample = train_data.sample(n=100) # train_data_Sample.plot(kind='scatter', x='Ins_Age', y='BMI') # plt.show() # sns.pairplot(train_data_Sample.drop('Product_Info_2', axis=1), # hue='Response', size=6) # plt.show() # sns.FacetGrid(train_data_Sample, hue="Response", size=4) \ # .map(sns.kdeplot, "Employment_Info_2") # plt.show() # andrews_curves(train_data.drop('Product_Info_2', axis=1), 'Response') # plt.show() # andrews_curves(train_data_Sample.drop('Product_Info_2', axis=1), 'Response') # plt.show() # parallel_coordinates(train_data_Sample.drop("Product_Info_2", axis=1), 'Response') # plt.show() # radviz(train_data_Sample.drop('Product_Info_2', axis=1), "Response") # plt.show() # # sns.FacetGrid(train_data_Sample, hue="Response", size=5).map(plt.scatter, "SepalLengthCm", "SepalWidthCm").add_legend() # plt.show() # # sns.FacetGrid(train_data_Sample, hue="Response", size=4)\ # .map(plt.scatter, 'Family_Hist_1', 'BMI').add_legend() # plt.show() # ax1 = sns.boxplot(x='Response', y='Ins_Age', data=train_data_Sample) # ax1 = sns.stripplot(x='Response', y="Ins_Age", data=train_data_Sample, # jitter=True, edgecolor="gray") # plt.show() # ax2 = sns.boxplot(x='Response', y='Ht', data=train_data_Sample) # ax2 = sns.stripplot(x='Response', y="Ht", data=train_data_Sample, # jitter=True, edgecolor="gray") # plt.show() # ax3 = sns.boxplot(x='Response', y='Wt', data=train_data_Sample) # ax3 = sns.stripplot(x='Response', y="Wt", data=train_data_Sample, # jitter=True, edgecolor="gray") # plt.show() # ax2 = sns.boxplot(x='Response', y='BMI', data=train_data_Sample) # ax2 = sns.stripplot(x='Response', y="BMI", data=train_data_Sample, # jitter=True, edgecolor="gray") # plt.show()
100+评论
captcha