Display the setting codes
# Required packages----
library (tidyverse)
library (lubridate)
library (gtsummary)
library (labelled)
library (sf)
library (showtext)
library (extrafont)
library (wesanderson)
# Graphs----
font_main = font_title = 'Times New Roman'
extrafont:: loadfonts (quiet = T)
face_text= 'plain'
face_title= 'plain'
size_title = 14
size_text = 11
legend_size = 11
global_theme <- function () {
theme_minimal () %+replace%
theme (
text = element_text (family = font_main, size = size_text, face = face_text),
legend.text = element_text (family = font_main, size = legend_size),
axis.text = element_text (size = size_text, face = face_text),
plot.title = element_text (
family = font_title,
size = size_title,
hjust = 0.5
),
plot.subtitle = element_text (hjust = 0.5 )
)
}
# Colours
colors_ <- wes_palette ('Rushmore1' )
col_seine <- "#2140A3"
The spatial information of the properties is given at the IRIS level. However, in some IRIS, as shown in Figure 2.1 , there are no observations. We will therefore use a spatial smoothing to impute values in all IRIS.
Load Data
Let us load the data obtained in Chapter 1 .
# Real Estate
load ("../data/data_clean_all.rda" )
# Maps files
load ("../data/shapes.rda" )
Display the codes used to create the Figure
shapes_paris |>
left_join (
data_clean_all |>
group_by (CODE_IRIS) |>
summarise (
total_observations = n (),
median_price = median (contract_final_net_price)
),
by = "CODE_IRIS"
) %>%
replace_na (list (total_observations = 0 )) |>
ggplot () +
geom_sf (aes (fill = total_observations)) +
scale_fill_gradientn (
colours = rev (terrain.colors (10 )),
name = 'No. Observations' ,
position = 'bottom'
) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme ()
Defining Neighbors
The size of the area of each IRIS differs considerably. Hence, smoothing the observation by simply defining higher levels by Euclidean distance from a given point might pose problems, as it could include many dense but heterogeneous regions on one end of the spectrum or only a few on the other. Instead, we will define a neighborhood graph (see the paper for a formal definition).
We will hereafter denote \(i\) and \(j\) the polygons of the ith and jth IRIS, respectively.
The neighborhood graph can be represented by an \(n\times n\) matrix named an adjacency matrix, denoted \(A\) .If regions \(i\) and \(j\) intersect, \(A_{i,j}=1\) , and \(A_{i,j}=0\) otherwise. Representing neighborhood relations using an adjacency matrix has the advantage that non-intermediate connections can be easily obtained by successively multiplying the adjacency matrix by itself. That is, all nonzero elements of \(A^2\) represent neighborhoods that are either immediately adjacent to a given region (the direct neighbors) or are adjacent to the direct neighbors (the neighbors of the direct neighbors). This process can be repeated \(n\) times to obtain neighbors that can be reached within an \(n\) length path. This provides a more natural way to define neighborhoods, as with each increasing path length, an entire homogeneous region is added to the higher-level aggregation.
Let us construct a neighborhood graphs with R, using {igraph}.
Here we need the Seine River as well. Otherwise, we cannot find neighbors.
shapes_all <-
shapes_paris |>
bind_rows (shapes_seine)
Let us get the first level:
shapes_neighbours_export <- st_intersects (shapes_all, shapes_all)
The \(A_1\) adjacency matrix can then be constructed. We initialize it with 0s.
adj_matrix_lev_1 <- matrix (
nrow = length (shapes_neighbours_export),
ncol = length (shapes_neighbours_export),
0
)
Then, we loop over all polygons and change the value of \(A_1(i,j)\) to 1 if regions \(i\) and \(j\) are neighbors:
row_index <- 1
for (item in shapes_neighbours_export) {
for (col_idx in item) {
adj_matrix_lev_1[row_index, col_idx] = 1
}
row_index <- row_index + 1
}
Then, by multiplying this square matrix by itself, we can obtain the neighbors and the neighbors of neighbors:
adj_matrix_lev_2 <- adj_matrix_lev_1 %*% adj_matrix_lev_1
adj_matrix_lev_2[which (adj_matrix_lev_2 > 0 )] <- 1
Let us focus on a particular IRIS and visualize it and its neighbors on a map. We will produce three maps: one which only shows the particular IRIS, another one which also its immediate neighbors, and a third one which shows the neighbors immediately adjacent or adjacent to the direct neighbor.
target_community <- "Enfants Rouges 4"
The map with the single target IRIS:
single_munip <-
shapes_paris %>%
rowid_to_column ('index_ids' ) |>
filter (grepl ('Paris' , NOM_COM)) |>
mutate (
Classification = if_else (
NOM_IRIS == target_community,
'target community' ,
'others' )
) |>
mutate (
Classification = factor (
Classification,
levels= c ('target community' , 'neighbours' , 'others' )
)
) |>
ggplot () +
geom_sf (aes (fill = Classification)) +
scale_fill_manual (
values = c (colors_[c (3 )], '#D21F3C' , colors_[c (1 )]),
limits = c ('target community' , 'neighbours' , 'others' ),
name = ''
) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme () +
theme (legend.position = "bottom" )
The map with the immediate neighbors:
one_neigh <- shapes_paris |>
rowid_to_column ('index_ids' ) |>
filter (grepl ('Paris' , NOM_COM)) |>
mutate (
Classification = if_else (
index_ids %in% which (
adj_matrix_lev_1[which (shapes_paris$ NOM_IRIS == target_community),] == 1
),
'neighbours' , 'others' )
) |>
mutate (
Classification = if_else (
index_ids == which (shapes_paris$ NOM_IRIS == target_community),
'target community' , Classification
)
) |>
mutate (
Classification = factor (
Classification,
levels= c ('target community' , 'neighbours' , 'others' )
)
) |>
ggplot () +
geom_sf (aes (fill = Classification)) +
scale_fill_manual (values = c (colors_[c (3 )], '#D21F3C' , colors_[c (1 )])) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme ()
Lastly, the third map with the both immediate neighbors and neighbors of neighbors:
two_neigh <-
shapes_paris |>
rowid_to_column ('index_ids' ) |>
filter (grepl ('Paris' , NOM_COM)) |>
mutate (
Classification = if_else (
index_ids %in% which (
adj_matrix_lev_2[which (shapes_paris$ NOM_IRIS == target_community),] > 0
),
'neighbours' ,
'others'
)
)|>
mutate (
Classification = if_else (
index_ids == which (shapes_paris$ NOM_IRIS == target_community),
'target community' ,
Classification
)
) |>
mutate (
Classification = factor (
Classification,
levels= c ('target community' , 'neighbours' , 'others' )
)
) |>
ggplot () +
geom_sf (aes (fill = Classification)) +
scale_fill_manual (values = c (colors_[c (3 )], '#D21F3C' , colors_[c (1 )])) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme ()
Figure 2.2 illustrates how the neighborhood set for a particular IRIS community can be calculated.
Display the codes used to create the Figure
p_example_neigh_row <- cowplot:: plot_grid (
single_munip + theme (legend.position= "none" ),
one_neigh + theme (legend.position= "none" ),
two_neigh + theme (legend.position= "none" ),
align = 'vh' ,
ncol = 3
)
p_example_neigh_legend <- cowplot:: get_legend (single_munip)
p_example_neigh <- cowplot:: plot_grid (
p_example_neigh_row, p_example_neigh_legend,
ncol = 1 , rel_heights = c (1 , .2 )
)
p_example_neigh
Spatial Price Smoothing
For a given variable observed at IRIS level \(x\) , the smoothed value for region \(r_i\) , denoted as \(\lambda_\omega(x_i)\) can be written as:
\[\begin{align}
\lambda_{\omega}(x_i) = \frac{1}{\sum_{j=1}^{n} {\omega}(r_i,r_j)}\sum_{j=1}^n {\omega}(r_i,r_j) x_i \enspace.
\end{align}
\tag{2.1}\] For example, let \(d(r_i, r_j)\) be the path length between regions \(r_i\) and \(r_j\) , then a simple way to define \(\omega(r_i,r_j)\) is: \[
\begin{align}\label{eq:weights}
\omega(r_i,r_j)=
\begin{cases}
\frac{1}{(1+d(r_i,r_j))^p},\quad & \text{if } d(i,j) \leq m \\
0, & \text{otherwise} \enspace,
\end{cases}
\end{align}
\tag{2.2}\] where \(p\) and \(m\) are hyperparameters to be selected, similar to the bandwidth operator.
We initiate the neighborhood matrix by putting the values we just computed:
neigh_matrix <- adj_matrix_lev_1
Then, we can increase the neighborhood distance \(m\) so that it takes values from 2 to 30 in steps of 1. This allows us to get each IRIS which are distant from one another by a value lower or equal to \(m\) .
# dir.create("../data/neighbours/all_neighbour_levels")
# cli::cli_progress_bar(total = length(seq(2,30)))
for (neigh_idst in seq (2 ,30 )) {
# Set neighbors
adj_matrix_next_level <- neigh_matrix %*% adj_matrix_lev_1
adj_matrix_next_level[which (adj_matrix_next_level > 0 )] <- 1
# Create Graph
graph_tmp <- graph_from_adjacency_matrix (adj_matrix_next_level)
export_string <- paste (
'../data/neighbours/all_neighbour_levels/neighbours_level_' ,
neigh_idst,
'.csv' ,
sep= ''
)
graph_tmp |>
as_data_frame () |>
mutate (access = neigh_idst) |>
write_csv (export_string)
# Reset and restart
neigh_matrix <- adj_matrix_next_level
# cli::cli_progress_update(set = which(neigh_idst == seq(2,30)))
}
We also export a tibble with a mapping between each IRIS and its row number in shapes_all
.
mapping_neighbours <-
shapes_all |>
rowid_to_column ('index_ids' ) |>
select (index_ids, CODE_IRIS) |>
as_tibble () |>
select (- c (geometry))
mapping_neighbours |>
write_csv ('../data/neighbours/mapping_neighbour_id_iris.csv' )
Now that we have obtained the adjacency matrices from \(m=\{1, 2, \ldots, 30\}\) , we can create a tibble which will contain the distance from each polygon to all the other polygons (provided the maximum distance is lower or equal to 30).
We first populate the desired object, all_neighbours
with all the IRIS which have a distance equal to 1:
all_neighbours <-
graph_from_adjacency_matrix (adj_matrix_lev_1) |>
as_data_frame () |>
tibble () |>
mutate (access = 1 )
all_neighbours
# A tibble: 7,559 × 3
from to access
<dbl> <dbl> <dbl>
1 1 1 1
2 1 220 1
3 1 555 1
4 1 671 1
5 1 831 1
6 1 860 1
7 2 2 1
8 2 115 1
9 2 173 1
10 2 457 1
# ℹ 7,549 more rows
Then, let us add, step by step, the IRIS which are neighbors with a distance lower or equal to 2, then to 3, and so on, until 30.
for (neigh_idst in seq (2 ,30 )) {
read_string <- paste (
'../data/neighbours/all_neighbour_levels/neighbours_level_' ,
neigh_idst,
'.csv' ,
sep = ''
)
neighbor_level <- read_csv (read_string, progress = FALSE )
# Add neighbors at current distance
all_neighbours <-
all_neighbours |>
bind_rows (neighbor_level)
}
Now, we would like to extract from the resulting object, the minimum distance from one IRIS to another, for all IRIS.
neighbours_all <- all_neighbours |>
group_by (from, to) |>
summarise (min_distance = min (access), .groups = "drop" ) |>
left_join (
mapping_neighbours,
by = c ('from' = 'index_ids' )
) |>
left_join (
mapping_neighbours,
by = c ('to' = 'index_ids' )
) |>
select (
from_iris = CODE_IRIS.x,
to_iris = CODE_IRIS.y,
distance = min_distance
)
The result can be saved:
neighbours_all |>
write_csv ('../data/neighbours/all_neighbours_paris.csv' )
neighbours_all <-
neighbours_all |>
mutate (
from_iris = as.character (from_iris),
to_iris = as.character (to_iris)
)
Here is the resulting matrix:
# A tibble: 974,169 × 3
from_iris to_iris distance
<chr> <chr> <dbl>
1 751145620 751145620 1
2 751145620 751145617 3
3 751145620 751135102 7
4 751145620 751156004 7
5 751145620 751124624 16
6 751145620 751051703 7
7 751145620 751083104 13
8 751145620 751166103 10
9 751145620 751145511 2
10 751145620 751145512 2
# ℹ 974,159 more rows
Let us compute the average selling price in each IRIS:
data_immo_agg <-
data_clean_all |>
group_by (CODE_IRIS) |>
summarise (mean_sale_price = mean (pm2))
Let us use this to plot the observed prices per square meter, on a choropleth map. We will display the unsmoothed version on the left and the smoothed version on the right. The smoothed version is computed using Equation 2.1 . We will use here a distance \(m=5\) .
m <- 5
smooth_prices <-
neighbours_all |>
mutate (distance = distance + 1 ) |>
mutate (distance = if_else (from_iris == to_iris, 1 , distance)) |>
filter (distance <= !! m) |>
left_join (
data_immo_agg,
by = c ('to_iris' = 'CODE_IRIS' )
) |>
mutate (inverse_weight = 1 / distance) |>
mutate (weighted_val = mean_sale_price * inverse_weight) |>
drop_na () |>
group_by (from_iris) |>
summarise (
total_weights = sum (inverse_weight),
total_val = sum (weighted_val)
) |>
mutate (smoothed_val = total_val / total_weights) |>
ungroup () |>
mutate (CODE_IRIS = from_iris) |>
select (CODE_IRIS, smoothed_price = smoothed_val)
Display the codes to create the Figure
# Unsmoothed version
p_1 <-
shapes_paris |>
left_join (
data_clean_all |>
group_by (CODE_IRIS) |>
summarise (
total_observations = n (),
median_price = mean (pm2),
std_price = sd (contract_final_net_price)
),
by = "CODE_IRIS"
) |>
replace_na (list (total_observations = 0 )) |>
ggplot () +
geom_sf (aes (fill = median_price)) +
scale_fill_gradientn (
colours = rev (terrain.colors (10 )),
limits = c (5000 ,20000 ),
name = 'Median Price' ,
position = 'bottom' ) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme () +
guides (
fill = guide_colorbar (
title = "Mean prices per IRIS" ,
position = 'bottom' ,
title.position = "top" ,
title.vjust = 0 ,
# draw border around the legend
frame.colour = "black" ,
barwidth = 10 ,
barheight = 1.5
)
)
# Smoothed version
p_2 <-
shapes_paris |>
left_join (smooth_prices, by = "CODE_IRIS" ) |>
ggplot () +
geom_sf (aes (fill = smoothed_price)) +
scale_fill_gradientn (
colours = rev (terrain.colors (10 )),
position = 'bottom' ,
labels = scales:: comma
) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme ()
First, we compute the median observed price in each IRIS of the dataset, using the raw values. Then, we add the smoothed prices computed earlier (using \(m=\) 5), for each IRIS. Then, we aggregate the values at the arrondissement level (column NOM_COM
in the map data): we compute the average of the median IRIS prices and the average of the smoothed version.
data_smooth_arrond <-
data_clean_all |>
mutate (CODE_IRIS = as.character (iris_cog)) |>
group_by (CODE_IRIS) |>
summarise (
median_qmp = median (pm2)
) |>
left_join (
smooth_prices |>
mutate (smoothed_qmp = smoothed_price),
by = "CODE_IRIS"
) |>
left_join (
shapes_paris |>
as_tibble () |>
select (CODE_IRIS, NOM_COM),
by = "CODE_IRIS"
) |>
group_by (NOM_COM) |>
summarise (
median_qmp = mean (median_qmp),
smoothed_qmp = mean (smoothed_qmp),
.groups = "drop"
)
The values are reported in Table 2.1 .
Display the codes used to create the Table
data_smooth_arrond |>
mutate (
NOM_COM = factor (
NOM_COM,
levels = str_c (
"Paris " , 1 : 20 , "e" , c ("r" , rep ("" , 19 )), " Arrondissement" )
)
) |>
arrange (NOM_COM) |>
knitr:: kable (
booktabs = TRUE ,
col.names = c ("Arrondissement" , "Average of Median" , "Average of Smooth" )
)
Table 2.1: Aggregated observed prices at the arrondissement level.
Paris 1er Arrondissement
12695.811
12483.324
Paris 2e Arrondissement
11724.438
11669.543
Paris 3e Arrondissement
12525.939
11654.372
Paris 4e Arrondissement
13216.678
12315.130
Paris 5e Arrondissement
12658.348
12068.673
Paris 6e Arrondissement
14251.791
13327.016
Paris 7e Arrondissement
14233.646
13141.138
Paris 8e Arrondissement
12087.873
11528.212
Paris 9e Arrondissement
11237.138
10849.159
Paris 10e Arrondissement
10114.682
10124.534
Paris 11e Arrondissement
10271.923
10267.033
Paris 12e Arrondissement
9700.242
9744.037
Paris 13e Arrondissement
8999.515
9521.970
Paris 14e Arrondissement
10301.420
10464.133
Paris 15e Arrondissement
10283.063
10691.530
Paris 16e Arrondissement
11244.938
11429.151
Paris 17e Arrondissement
10845.312
10917.631
Paris 18e Arrondissement
9501.718
9670.950
Paris 19e Arrondissement
8334.067
8566.154
Paris 20e Arrondissement
8663.208
8915.073
Let us now visualize these values on two choropleth map, one for each aggregated method (Figure 2.3 ).
Display the codes used to create the Figure
p_1 <- shapes_paris |>
group_by (NOM_COM) |>
summarise (
arrond_shape = st_combine (geometry)
) |>
left_join (data_smooth_arrond, by = "NOM_COM" ) |>
ggplot () +
geom_sf (aes (fill = median_qmp)) +
scale_fill_gradientn (
colours = rev (terrain.colors (10 )),
limits = c (5000 , 15000 ),
name = 'Price' ,
position = 'bottom' ,
labels = scales:: comma
) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
ggtitle ('Aggregated Raw' ) +
global_theme () +
guides (
fill = guide_colorbar (
title = "Mean prices per Arrondissement" ,
position= 'bottom' ,
title.position = "top" , title.vjust = 0 ,
frame.colour = "black" ,
barwidth = 15 ,
barheight = 1.5
)
)
p_2 <- shapes_paris |>
group_by (NOM_COM) |>
summarise (
arrond_shape = st_combine (geometry)
) |>
left_join (data_smooth_arrond, by = "NOM_COM" ) |>
ggplot () +
geom_sf (aes (fill = smoothed_qmp)) +
scale_fill_gradientn (
colours = rev (terrain.colors (10 )),
limits = c (5000 , 15000 ),
name = 'Price' ,
position = 'bottom' ,
labels = scales:: comma
) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
ggtitle ('Aggregated Smoothed' ) +
global_theme ()
ggpubr:: ggarrange (
p_1, p_2,
ncol = 2 ,nrow = 1 ,
legend = "bottom" ,
common.legend = TRUE
)
Relative Errors in Prices
Let us dig into the relative error between estimated prices and observed ones. First, we compute the mean relative difference between the observed and the estimated price, per IRIS. The is computed as follows \[\frac{z - \hat{z}}{z}\]
diffs_smooth <-
data_clean_all |>
filter (pm2 < 20000 ) |>
filter (pm2_estimated < 20000 ) |>
select (CODE_IRIS, pm2, difference_pm2) |>
mutate (relative_error = difference_pm2/ pm2) |>
group_by (CODE_IRIS) |>
summarise (
mean_relative_diff = mean (relative_error),
median_relative_diff = median (relative_error),
.groups = "drop"
)
There are two outliers, so we decide to trim the data.
cuts_ <- quantile (diffs_smooth$ mean_relative_diff, c (0.02 ,0.98 ))
diffs_smooth <-
diffs_smooth |>
filter (mean_relative_diff > cuts_[1 ]) |>
filter (mean_relative_diff < cuts_[2 ])
We use Equation 2.1 to compute the smoothed values, using \(m=\) 5.
smoothed_diff <-
neighbours_all |>
mutate (distance = distance + 1 ) |>
filter (distance <= !! m) |>
mutate (distance = if_else (from_iris == to_iris, 1 , distance)) |>
left_join (
diffs_smooth,
by = c ('to_iris' = 'CODE_IRIS' )
) |>
mutate (inverse_weight = 1 / distance) |>
mutate (value = mean_relative_diff * inverse_weight^ 2 ) |>
drop_na () |>
group_by (from_iris) |>
summarise (
total_weights = sum (inverse_weight),
total_value = sum (value)
) |>
mutate (smoothed_diff = total_value / total_weights) |>
ungroup () |>
mutate (CODE_IRIS = from_iris, smoothed_diff)
Before plotting the map with the values, we isolate some regions of interest that will be plotted on the maps as well and used as examples in paper.
two IRIS: Montmartre and Champs-de-Mars.
two arrondissements : the 12th and the 20th.
shape_champs <-
shapes_paris |>
filter (CODE_IRIS == '751072812' )
shape_mont <-
shapes_paris|>
filter (CODE_IRIS == '751093503' )
shape_12 <-
shapes_paris |>
filter (NOM_COM == 'Paris 12e Arrondissement' ) |>
summarise (shape = st_union (geometry))
shape_20 <-
shapes_paris |>
filter (NOM_COM == 'Paris 20e Arrondissement' ) |>
summarise (shape = st_union (geometry))
The relative errors per IRIS are shown in Figure 2.4 .
Display the codes used to create the Figure
scale_val <- seq (
min (smoothed_diff$ smoothed_diff),
max (smoothed_diff$ smoothed_diff),
length.out = 4
)
p <- shapes_paris |>
left_join (smoothed_diff, by = "CODE_IRIS" ) |>
ggplot () +
geom_sf (aes (fill = smoothed_diff)) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
geom_sf (
data = shape_champs,
color = 'black' ,
fill = alpha ('white' , 0 ),
lwd = 1 ,
lty = 'solid'
) +
geom_sf (
data = shape_mont,
color = 'black' ,
lwd = 1 ,
fill = alpha ('white' , 0 ),
lty = 'solid'
) +
geom_sf (
data = shape_12,
color = 'black' ,
lwd = 1 ,
fill = alpha ('white' , 0 ),
lty = 'dashed'
) +
geom_sf (
data = shape_20,
color = 'black' ,
lwd = 1 ,
fill = alpha ('white' , 0 ),
lty = 'dashed'
) +
global_theme () +
scale_fill_gradient2 (
NULL ,
midpoint = 0 ,
high = "#000090" ,
mid = "white" ,
low = "#CD3700" ,
breaks = scale_val,
labels = scales:: percent (scale_val)
) +
theme (
legend.position = 'bottom'
) +
ggtitle ('Smoothed relative error' )
panel_width <- unit (1 ,"npc" ) - sum (ggplotGrob (p)[["widths" ]][- 3 ])
p + guides (fill = guide_colorbar (barwidth = panel_width/ 2 ))
Number of observation per IRIS
Let us also show the number of observation, using the smoothed values. First, we smooth the number of observation in each IRIS:
smooth_n <-
neighbours_all |>
mutate (distance = distance + 1 ) |>
mutate (distance = if_else (from_iris == to_iris, 1 , distance)) |>
filter (distance <= !! m) |>
left_join (
data_clean_all |> count (CODE_IRIS, name = "n" ),
by = c ('to_iris' = 'CODE_IRIS' )
) |>
mutate (inverse_weight = 1 / distance) |>
mutate (weighted_val = n * inverse_weight) |>
drop_na () |>
group_by (from_iris) |>
summarise (
total_weights = sum (inverse_weight),
total_val = sum (weighted_val)
) |>
mutate (smoothed_n = total_val / total_weights) |>
ungroup () |>
mutate (CODE_IRIS = from_iris) |>
select (CODE_IRIS, smoothed_n = smoothed_n)
Then, we can plot the result (Figure 2.5 ).
Display the codes used to create the Figure
shapes_paris |>
left_join (smooth_n, by = "CODE_IRIS" ) |>
ggplot () +
geom_sf (aes (fill = smoothed_n)) +
scale_fill_gradientn (
colours = rev (terrain.colors (10 )),
name = 'No. Observations' ,
position = 'bottom'
) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme ()
Wealth Level per IRIS
Let us have a look at the wealth level per IRIS, using the smoothed version.
We load the income data per IRIS (see Section 1.4 in Chapter 1 ):
data_income <- read_delim (
str_c (
"../data/econ/" ,
"BASE_TD_FILO_DISP_IRIS_2020_CSV/BASE_TD_FILO_DISP_IRIS_2020.csv"
),
delim = ";" ,
escape_double = FALSE ,
trim_ws = TRUE
)
The data needs to be formatted to match with the other files used here.
median_inc_data <-
data_income |>
select (CODE_IRIS = IRIS, DISP_MED20) |>
mutate (DISP_MED20 = as.numeric (DISP_MED20))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `DISP_MED20 = as.numeric(DISP_MED20)`.
Caused by warning:
! NAs introduced by coercion
We can compute the smoothed values per IRIS.
smoothed_income <-
neighbours_all |>
mutate (distance = distance + 1 ) |>
mutate (distance = if_else (from_iris == to_iris, 1 , distance)) |>
filter (distance <= !! m) |>
left_join (
mapping_neighbours,
by= c ('to_iris' = 'CODE_IRIS' )
) |>
mutate (CODE_IRIS = as.character (to_iris)) |>
left_join (
median_inc_data,
by = 'CODE_IRIS'
) |>
mutate (inverse_weight = 1 / distance) |>
mutate (weighted_inc = DISP_MED20 * inverse_weight) |>
drop_na () |>
group_by (from_iris) |>
summarise (
total_weights = sum (inverse_weight),
total_inc = sum (weighted_inc)
) |>
mutate (smoothed_inc = total_inc / total_weights) |>
ungroup () |>
mutate (CODE_IRIS = from_iris) |>
select (CODE_IRIS, smoothed_income = smoothed_inc)
Then, we can plot the result (Figure 2.6 ).
Display the codes used to create the Figure
shapes_paris |>
left_join (smoothed_income, by = "CODE_IRIS" ) |>
ggplot () +
geom_sf (aes (fill = smoothed_income)) +
scale_fill_gradientn (
colours = rev (terrain.colors (10 )),
labels = scales:: comma
) +
geom_sf (
data = shapes_seine,
fill = col_seine
) +
global_theme () +
theme (legend.position = "bottom" ) +
guides (
fill = guide_colorbar (
title = "Smoothed Median Income per IRIS" ,
position= 'bottom' ,
title.position = "top" ,
title.vjust = 0 ,
frame.colour = "black" ,
barwidth = 15 ,
barheight = 1.5
)
)