Train-test split # The original dataframe 'df' has the header row included as the first data entry (index 0). # This causes issues with data types and the 'Respon_Promo' column having a unique string value. # We need to clean the dataframe first before proceeding with train-test split. # Make a copy to avoid modifying the global df directly if it's used elsewhere df_cleaned = df.copy() # Drop the first row which contains the column headers as data df_cleaned = df_cleaned.iloc[1:] # Convert relevant columns to numeric types, coercing errors to NaN numeric_cols = ['Usia', 'Pendapatan', 'FrekuensiBelanja', 'TotalBelanja', 'Respon_Promo'] for col in numeric_cols: df_cleaned[col] = pd.to_numeric(df_cleaned[col], errors='coerce') # Drop rows with NaN values if any arise from coercion (e.g., if there were non-numeric data besides headers) df_cleaned.dropna(inplace=True) # Re-encode 'TrenPenjualan' based on the cleaned dataframe # This column was encoded in a previous cell, but the original 'df' contained the header as data. # So, 'TrenPenjualan_encoded' for the first row of `X` was also incorrect. encoder = LabelEncoder() df_cleaned['TrenPenjualan_encoded'] = encoder.fit_transform(df_cleaned['TrenPenjualan']) # Redefine features (X) and target (y) using the cleaned dataframe X = df_cleaned[['Usia', 'Pendapatan', 'FrekuensiBelanja', 'TotalBelanja', 'TrenPenjualan_encoded']] y = df_cleaned['Respon_Promo'] # Now perform the train-test split with the cleaned data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y )