Appendix C Codes for Transformation Steps in MGWR Population Disaggregation

# now this time this gpd is already have predictions of 
#logged grid-level population density
# in df[Predicted_log_Y]
df = gpd.read_file('data/cleaned/grid_data_for_weights_mgwr03.geojson')

#--- Step 2: Defining parameters and identifying areas ---

# Parameters for weighting
GAMMA = 0.5  # Softmax sharpness: >1 sharper, <1 smoother

ALPHA = 0.5  # Power transform for smoothing: <1 compresses high values

BASE_WEIGHT_UNINHABITED = 1e-9 # A very small base weight for 
#non-residential areas for visulisation, but does not 
#contribute to population as the remainder function 
#will not allocate any population to these areas.

WINSOR_THRESHOLD = 0.99 # Percentile to cap extreme weights

# Define inhabited vs. uninhabited areas based on a key variable
# Using 'illum_vol_density' is a robust choice.
uninhabited_mask = (df['illum_vol_density'] == 0)
habited_mask = ~uninhabited_mask

# Introduce a smoothing parameter epsilon in the Softmax function
#--- Step 3: Calculating preliminary weights (w_pt) ---

def softmax_on_scaled_z(z_vals, gamma=1.0):
    """Calculates softmax on min-max scaled values."""
    # ensure working with a NumPy array
    z = np.asarray(z_vals, dtype=float)
    # if all are nan, return an array of nans
    if np.isnan(z).all():
        return np.full_like(z, np.nan)
    # compute min and max ignoring nan
    z_min = np.nanmin(z)
    z_max = np.nanmax(z)
    # if constant, return uniform weights
    if z_max == z_min:
        return np.full_like(z, 1.0 / z.size)
    # min–max scale
    z_scaled = (z - z_min) / (z_max - z_min)
    exps = np.exp(gamma * z_scaled)
    return exps / np.nansum(exps)

# a. Calculate softmax weights for HABITED areas
df['w_raw'] = np.nan
df.loc[habited_mask, 'w_raw'] = df[habited_mask].groupby('Id')
['Predicted_log_Y'].transform(
    lambda z: softmax_on_scaled_z(z.values, gamma=GAMMA)
)

# b. Apply alpha power transform to smooth the raw weights
df['w_pt'] = np.nan
df.loc[habited_mask, 'w_pt'] = df.loc[habited_mask, 'w_raw'] ** ALPHA

# c. Assign a tiny base weight to UNINHABITED areas
df.loc[uninhabited_mask, 'w_pt'] = BASE_WEIGHT_UNINHABITED

#-- Step 4: Normalizing, winsorizing, and finalizing weights ---

# a. First normalization to get initial final weights
df['w_initial'] = df.groupby('Id')['w_pt'].
transform(lambda w: w / w.sum())

# b. Winsorize (trim) the weights to handle outliers
threshold = df['w_initial'].quantile(WINSOR_THRESHOLD)
df['w_trim'] = np.minimum(df['w_initial'], threshold)

# c. Final re-normalization to get the final weights for disaggregation
df['w_final_trim'] = df.groupby('Id')['w_trim'].
transform(lambda w: w / w.sum())