#--- Step 2: Defining parameters and identifying areas ---
# Parameters for weighting
GAMMA = 0.5 # Softmax sharpness: >1 sharper, <1 smoother
ALPHA = 0.5 # Power transform for smoothing: <1 compresses high values
BASE_WEIGHT_UNINHABITED = 1e-9 # A very small base weight for
#non-residential areas for visulisation, but does not
#contribute to population as the remainder function
#will not allocate any population to these areas.
WINSOR_THRESHOLD = 0.99 # Percentile to cap extreme weights
# Define inhabited vs. uninhabited areas based on a key variable
# Using 'illum_vol_density' is a robust choice.
uninhabited_mask = (df['illum_vol_density'] == 0)
habited_mask = ~uninhabited_mask
# Introduce a smoothing parameter epsilon in the Softmax function
#--- Step 3: Calculating preliminary weights (w_pt) ---
def softmax_on_scaled_z(z_vals, gamma=1.0):
"""Calculates softmax on min-max scaled values."""
# ensure working with a NumPy array
z = np.asarray(z_vals, dtype=float)
# if all are nan, return an array of nans
if np.isnan(z).all():
return np.full_like(z, np.nan)
# compute min and max ignoring nan
z_min = np.nanmin(z)
z_max = np.nanmax(z)
# if constant, return uniform weights
if z_max == z_min:
return np.full_like(z, 1.0 / z.size)
# min–max scale
z_scaled = (z - z_min) / (z_max - z_min)
exps = np.exp(gamma * z_scaled)
return exps / np.nansum(exps)
# a. Calculate softmax weights for HABITED areas
df['w_raw'] = np.nan
df.loc[habited_mask, 'w_raw'] = df[habited_mask].groupby('Id')
['Predicted_log_Y'].transform(
lambda z: softmax_on_scaled_z(z.values, gamma=GAMMA)
)
# b. Apply alpha power transform to smooth the raw weights
df['w_pt'] = np.nan
df.loc[habited_mask, 'w_pt'] = df.loc[habited_mask, 'w_raw'] ** ALPHA
# c. Assign a tiny base weight to UNINHABITED areas
df.loc[uninhabited_mask, 'w_pt'] = BASE_WEIGHT_UNINHABITED
#-- Step 4: Normalizing, winsorizing, and finalizing weights ---
# a. First normalization to get initial final weights
df['w_initial'] = df.groupby('Id')['w_pt'].
transform(lambda w: w / w.sum())
# b. Winsorize (trim) the weights to handle outliers
threshold = df['w_initial'].quantile(WINSOR_THRESHOLD)
df['w_trim'] = np.minimum(df['w_initial'], threshold)
# c. Final re-normalization to get the final weights for disaggregation
df['w_final_trim'] = df.groupby('Id')['w_trim'].
transform(lambda w: w / w.sum())