Evalutation_code_for_participant
Evalutation_code_for_participant
import sys
Please ensure that the predicted_score column does not have any null columns and
the column names are exactly matching as above.
Please ensure that all these files are stored as ',' separated csv files.
python Evaluation_Code.py
C/Users/anujahardaha/Documents/final_predictions_with_temp1.csv
'''
def checkDataType1(df):
assert (df['match id'].isna().sum() == 0), 'match id should not have NaNs'
assert (df['match id'].dtype == 'int64'), ('match id is not int64 type')
assert df['win_pred_team_id'].isna().sum(
) == 0, 'win_pred_team_id should not have NaNs'
assert df['win_pred_team_id'].dtype == 'int64', (
'win_pred_team_id is not int64 type')
assert df['win_pred_score'].isna().sum(
) == 0, 'win_pred_score should not have NaNs'
assert df['win_pred_score'].dtype == 'float64', (
'win_pred_score is not float64 type')
assert df['train_algorithm'].isna().sum(
) == 0, 'train_algorithm should not have NaNs'
assert df['train_algorithm'].dtype == 'object', (
'train_algorithm is not object type')
assert df['is_ensemble'].isna().sum(
) == 0, 'is_ensemble should not have NaNs'
assert df['is_ensemble'].dtype == 'object', (
'is_ensemble is not object type')
assert df['train_hps_trees'].isna().sum(
) == 0, 'train_hps_trees should not have NaNs'
assert df['train_hps_depth'].isna().sum(
) == 0, 'train_hps_depth should not have NaNs'
assert df['train_hps_lr'].isna().sum(
) == 0, 'train_hps_lr should not have NaNs'
return None
def checkDataType2(df):
assert df['feat_id'].isna().sum() == 0, 'feat_id should not have NaNs'
assert df['feat_id'].dtype == 'int64', ('feat_id is not int type')
assert df['feat_name'].isna().sum() == 0, 'feat_name should not have NaNs'
assert df['feat_name'].dtype == 'object', ('feat_name is not object type')
assert df['feat_description'].isna().sum(
) == 0, 'feat_description should not have NaNs'
assert df['feat_description'].dtype == 'object', (
'feat_description is not object type')
assert df['model_feat_imp_train'].isna().sum(
) == 0, ' model_feat_imp_train should not have NaNs'
assert df['model_feat_imp_train'].dtype == 'float64', (
'model_feat_imp_train is not float type')
assert df['feat_rank_train'].isna().sum(
) == 0, 'feat_rank_train should not have NaNs'
assert df['feat_rank_train'].dtype == 'int64', (
'feat_rank_train is not int64 type')
return None
def getAccuracy(df):
return round(df[df['winner_id'] ==
df['win_pred_team_id']].shape[0]*100/df.shape[0], 4)
if len(sys.argv) != 4:
sys.exit("Please pass three files only as mentioned in the Instructions.")
input2_address = sys.argv[2]
df_input2 = pd.read_csv(input2_address, sep=",", header=0)
# For participants Team : Location of Dependent Variable file. Header here would be
match_id, dataset_type, win_team_id. Participants can generate from the labeled
train data. These files are comma separated
round_eval = sys.argv[3]
df_round = pd.read_csv(round_eval, sep=",", header=0)
checkDataType1(df_input1)
checkDataType2(df_input2)
'''
shape_before_join = df_round.shape[0]