Spaces:
Sleeping
Sleeping
Alexis Galvis
commited on
Commit
路
d790594
1
Parent(s):
84960aa
app
Browse files- app.py +96 -0
- data/output/loan_scores.pkl +3 -0
- models/loan_model_2.h5 +3 -0
- models/preprocessor.pkl +3 -0
- requirements.txt +0 -0
- utils/__pycache__/calculate_probability_prediction.cpython-310.pyc +0 -0
- utils/__pycache__/category_classification.cpython-310.pyc +0 -0
- utils/__pycache__/create_and_save_plot.cpython-310.pyc +0 -0
- utils/calculate_probability_prediction.py +48 -0
- utils/category_classification.py +15 -0
- utils/create_and_save_plot.py +21 -0
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
import numpy as np
|
4 |
+
import tensorflow as tf
|
5 |
+
import joblib
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
from utils.create_and_save_plot import plot_credit_score_distribution
|
9 |
+
from utils.calculate_probability_prediction import probability_to_score_v3
|
10 |
+
from utils.category_classification import credit_score_range_classification
|
11 |
+
|
12 |
+
# Cargar modelo y preprocesador
|
13 |
+
model = tf.keras.models.load_model("models/loan_model_2.h5")
|
14 |
+
preprocessor = joblib.load("models/preprocessor.pkl")
|
15 |
+
|
16 |
+
|
17 |
+
def calculate_dti(annual_income, total_monthly_debt):
|
18 |
+
"""Calcula el Debt-to-Income Ratio (DTI)"""
|
19 |
+
if annual_income > 0:
|
20 |
+
monthly_income = annual_income / 12
|
21 |
+
dti = (total_monthly_debt / monthly_income) * 100
|
22 |
+
return round(dti, 2)
|
23 |
+
return 0
|
24 |
+
|
25 |
+
|
26 |
+
# Interfaz Streamlit
|
27 |
+
st.title("Predicci贸n de Puntaje de Cr茅dito")
|
28 |
+
|
29 |
+
st.sidebar.header("Ingrese los valores del pr茅stamo")
|
30 |
+
|
31 |
+
# Nuevas variables de entrada con descripciones
|
32 |
+
annual_inc = st.sidebar.number_input(
|
33 |
+
"Ingreso Anual (USD)", min_value=0.0, value=36000.0,
|
34 |
+
help="(float) Ingreso anual en d贸lares antes de impuestos."
|
35 |
+
)
|
36 |
+
emp_length = st.sidebar.selectbox(
|
37 |
+
"Tiempo en el Trabajo", ["< 1 year", "1-5 years", "6-10 years", "10+ years"],
|
38 |
+
help="(categor铆a) Duraci贸n del empleo actual."
|
39 |
+
)
|
40 |
+
home_ownership = st.sidebar.selectbox(
|
41 |
+
"Tipo de Propiedad", ["OWN", "MORTGAGE", "RENT"],
|
42 |
+
help="(categor铆a) Tipo de propiedad del solicitante."
|
43 |
+
)
|
44 |
+
purpose = st.sidebar.selectbox(
|
45 |
+
"Prop贸sito del Pr茅stamo", ["debt_consolidation", "credit_card", "home_improvement"],
|
46 |
+
help="(categor铆a) Raz贸n principal del pr茅stamo."
|
47 |
+
)
|
48 |
+
zip_code = st.sidebar.number_input(
|
49 |
+
"C贸digo Postal", min_value=10000, max_value=99999, step=1, value=90210,
|
50 |
+
help="(int) C贸digo postal de residencia del solicitante."
|
51 |
+
)
|
52 |
+
open_acc = st.sidebar.number_input(
|
53 |
+
"Cuentas Abiertas", min_value=0, step=1, value=5,
|
54 |
+
help="(int) N煤mero total de cuentas de cr茅dito abiertas."
|
55 |
+
)
|
56 |
+
total_monthly_debt = st.sidebar.number_input(
|
57 |
+
"Pagos Mensuales de Deuda (USD)", min_value=0.0, value=600.0,
|
58 |
+
help="(float) Total de pagos mensuales de deuda (pr茅stamos, tarjetas, hipotecas)."
|
59 |
+
)
|
60 |
+
|
61 |
+
# Calcular DTI
|
62 |
+
dti = calculate_dti(annual_inc, total_monthly_debt)
|
63 |
+
st.sidebar.write(f"DTI Calculado: {dti}%")
|
64 |
+
|
65 |
+
if st.sidebar.button("Predecir"):
|
66 |
+
# Crear el DataFrame con los datos de entrada
|
67 |
+
input_data = {
|
68 |
+
"annual_inc": [annual_inc],
|
69 |
+
"emp_length": [emp_length],
|
70 |
+
"home_ownership": [home_ownership],
|
71 |
+
"purpose": [purpose],
|
72 |
+
"zip_code": [zip_code],
|
73 |
+
"open_acc": [open_acc],
|
74 |
+
"dti": [dti]
|
75 |
+
}
|
76 |
+
df = pd.DataFrame(input_data)
|
77 |
+
|
78 |
+
# Preprocesar y predecir
|
79 |
+
data_processed = preprocessor.transform(df)
|
80 |
+
predictions = model.predict(data_processed).ravel()
|
81 |
+
y_scores = joblib.load(os.path.join('data', 'output', 'loan_scores.pkl'))
|
82 |
+
credit_score = probability_to_score_v3(predictions[0])
|
83 |
+
|
84 |
+
# Mostrar resultado
|
85 |
+
color = credit_score_range_classification(credit_score)
|
86 |
+
st.markdown(
|
87 |
+
f'<h2 style="color:{color};">Puntaje de Cr茅dito Estimado: {credit_score:.2f}</h2>',
|
88 |
+
unsafe_allow_html=True
|
89 |
+
)
|
90 |
+
|
91 |
+
# Generar gr谩fico
|
92 |
+
buffer = plot_credit_score_distribution(y_scores, credit_score)
|
93 |
+
image_data = buffer.getvalue()
|
94 |
+
buffer.close()
|
95 |
+
|
96 |
+
st.image(image_data, caption="Distribuci贸n de Puntajes de Cr茅dito", use_container_width=True)
|
data/output/loan_scores.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67b1ef0919f5cff5e4732fa33c14334d830acf50b622c32bd44a63f8e4f5f98c
|
3 |
+
size 474033
|
models/loan_model_2.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24adab470382c8e80311421fe2b6fbd1cf73d665888a93b3a47303f55bd9a3e7
|
3 |
+
size 708384
|
models/preprocessor.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c2d49b06300e354172e421ff3e97c3c9fcc4cdafad346ad64e5da1275478b55
|
3 |
+
size 5130
|
requirements.txt
ADDED
Binary file (454 Bytes). View file
|
|
utils/__pycache__/calculate_probability_prediction.cpython-310.pyc
ADDED
Binary file (1.27 kB). View file
|
|
utils/__pycache__/category_classification.cpython-310.pyc
ADDED
Binary file (528 Bytes). View file
|
|
utils/__pycache__/create_and_save_plot.cpython-310.pyc
ADDED
Binary file (964 Bytes). View file
|
|
utils/calculate_probability_prediction.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def probability_to_score_v3(prob, base_score=300, max_score=850, threshold=0.326,
|
5 |
+
expansion_factor_low=3, expansion_factor_high=0.7):
|
6 |
+
"""
|
7 |
+
Convierte probabilidades en puntajes de cr茅dito con expansi贸n no lineal
|
8 |
+
para distribuir mejor en los extremos.
|
9 |
+
|
10 |
+
Args:
|
11 |
+
prob (float): Probabilidad de default.
|
12 |
+
base_score (int): Puntaje base.
|
13 |
+
max_score (int): Puntaje m谩ximo.
|
14 |
+
threshold (float): Valor de corte 贸ptimo.
|
15 |
+
expansion_factor_low (float): Factor para expandir la parte baja del rango.
|
16 |
+
expansion_factor_high (float): Factor para expandir la parte alta del rango.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
score (float): Puntaje de cr茅dito ajustado.
|
20 |
+
"""
|
21 |
+
# Invertir la probabilidad para que mayor valor sea mejor score
|
22 |
+
inverted_prob = 1 - prob
|
23 |
+
|
24 |
+
# Punto de corte invertido
|
25 |
+
inverted_threshold = 1 - threshold
|
26 |
+
|
27 |
+
# Determinar si es un score alto o bajo
|
28 |
+
if inverted_prob >= inverted_threshold: # Buenos clientes
|
29 |
+
# Normalizar la probabilidad en el rango de buenos
|
30 |
+
normalized = (inverted_prob - inverted_threshold) / (1 - inverted_threshold)
|
31 |
+
# Aplicar expansi贸n no lineal
|
32 |
+
transformed = normalized ** expansion_factor_high
|
33 |
+
# Mapear al rango superior
|
34 |
+
mid_score = 550 # Punto medio del rango
|
35 |
+
score = mid_score + (max_score - mid_score) * transformed
|
36 |
+
else: # Malos clientes
|
37 |
+
# Normalizar la probabilidad en el rango de malos
|
38 |
+
normalized = inverted_prob / inverted_threshold
|
39 |
+
# Aplicar expansi贸n no lineal para los scores bajos
|
40 |
+
transformed = normalized ** expansion_factor_low
|
41 |
+
# Mapear al rango inferior
|
42 |
+
mid_score = 550 # Punto medio del rango
|
43 |
+
score = base_score + (mid_score - base_score) * transformed
|
44 |
+
|
45 |
+
# Asegurar que el score est茅 dentro del rango permitido
|
46 |
+
score = np.clip(score, base_score, max_score)
|
47 |
+
|
48 |
+
return score
|
utils/category_classification.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
def credit_score_range_classification(credit_score):
|
3 |
+
color = "white"
|
4 |
+
if (credit_score >= 300) and (credit_score <= 579):
|
5 |
+
color = "red"
|
6 |
+
elif (credit_score >= 580) and (credit_score <= 669):
|
7 |
+
color = "orange"
|
8 |
+
elif (credit_score >= 670) and (credit_score <= 739):
|
9 |
+
color = "yellow"
|
10 |
+
elif (credit_score >= 740) and (credit_score <= 799):
|
11 |
+
color = "lightgreen"
|
12 |
+
else:
|
13 |
+
color = "green"
|
14 |
+
|
15 |
+
return color
|
utils/create_and_save_plot.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
from io import BytesIO
|
3 |
+
|
4 |
+
def plot_credit_score_distribution(scores, point):
|
5 |
+
plt.figure(figsize=(10, 6))
|
6 |
+
plt.hist(scores, bins=30, color='green', alpha=0.6)
|
7 |
+
plt.yscale('log')
|
8 |
+
plt.axvline(x=point, color='red', linestyle='--', label=f'Puntaje {point}')
|
9 |
+
plt.scatter(point, 1, color='red', s=100, zorder=5)
|
10 |
+
plt.title("Distribuci贸n de Puntajes de Cr茅dito (300-850)")
|
11 |
+
plt.xlabel("Puntaje")
|
12 |
+
plt.ylabel("Frecuencia (escala logar铆tmica)")
|
13 |
+
plt.grid(True)
|
14 |
+
plt.legend()
|
15 |
+
|
16 |
+
buffer = BytesIO()
|
17 |
+
plt.savefig(buffer, format='png')
|
18 |
+
buffer.seek(0) # Mover el puntero al inicio del buffer
|
19 |
+
plt.close() # Cerrar la figura para liberar memoria
|
20 |
+
|
21 |
+
return buffer
|