-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPRO_1.py
54 lines (45 loc) · 2.14 KB
/
PRO_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# Load the data
print("Loading and analyzing the IBM Telco Customer Churn dataset...\n")
data = pd.read_csv("telco.csv")
# 1. DATA OVERVIEW AND QUALITY CHECKS
print("\n=== DATA OVERVIEW ===")
print("\nDataset Shape:", data.shape)
print("\nColumns:", data.columns.tolist())
print("\nSample Data (first 5 rows):\n", data.head())
print("\nData Types:\n", data.dtypes)
print("\nMissing Values:\n", data.isnull().sum()[data.isnull().sum() > 0])
print("\nBasic Statistics for Numerical Columns:\n", data.describe())
# Basic Analysis
print("\n=== BASIC ANALYSIS ===")
print(f"\n1. Total number of customers: {len(data)}")
print(f"2. Churn Rate: {(data['Churn Label'] == 'Yes').mean():.2%}")
print(f"3. Average Customer Age: {data['Age'].mean():.1f} years")
print(f"4. Average CLTV: ${data['CLTV'].mean():.2f}")
# Customer Demographics
print("\n=== CUSTOMER DEMOGRAPHICS ===")
print("\nGender Distribution:")
print(data['Gender'].value_counts(normalize=True).apply(lambda x: f"{x:.2%}"))
print("\nAge Groups:")
age_bins = [0, 30, 45, 60, 100]
age_labels = ['Young Adult (0-30)', 'Middle Age (31-45)', 'Senior (46-60)', 'Elderly (60+)']
data['Age_Group'] = pd.cut(data['Age'], bins=age_bins, labels=age_labels)
print(data['Age_Group'].value_counts(normalize=True).apply(lambda x: f"{x:.2%}"))
# Churn Analysis
print("\n=== CHURN ANALYSIS ===")
print("\nChurn by Gender:")
churn_by_gender = pd.crosstab(data['Gender'], data['Churn Label'], normalize='index')
print(churn_by_gender.apply(lambda x: x.apply(lambda y: f"{y:.2%}")))
print("\nTop 5 Churn Reasons:")
churn_reasons = data[data['Churn Label'] == 'Yes']['Churn Reason'].value_counts().head()
for reason, count in churn_reasons.items():
print(f"- {reason}: {count} customers ({count/len(data[data['Churn Label'] == 'Yes']):.2%} of churned customers)")
# Customer Value Analysis
print("\n=== CUSTOMER VALUE ANALYSIS ===")
print(f"\nAverage CLTV by Churn Status:")
print(data.groupby('Churn Label')['CLTV'].mean().apply(lambda x: f"${x:.2f}"))
print("\nChurn Score Statistics:")
print(data['Churn Score'].describe().apply(lambda x: f"{x:.2f}"))