---
title: "Reaching_the_Voters_Supplementary_Material"
author: "Jasmin Fitzpatrick and Felix-Christopher von Nostitz"
format: html
code-fold: true
embed-resources: true
editor: visual 
---


#'R version 4.4.1 (2024-06-14 ucrt) -- "Race for Your Life" #Copyright (C) 2024 The R Foundation for Statistical Computing' 
# Data Retrieval: Export via transparency library: https://adstransparency.google.com/political?region=DE&topic=political 
# original export date: 22/10/2021, 19:36 CET 
#working file: google-political-ads-creative-stats.csv 
#additional information: https://support.google.com/adspolicy/answer/6014595?hl=en#zippy= 

```{r}
#Prep of source data
# Package readr version 2.1.5 documented via https://readr.tidyverse.org/ 
library(readr)
####the source is real
real.data <- read.csv("google-political-ads-creative-stats.csv", sep=',')
View(real.data)
```

```{r}
####creating working file
data.prep <- real.data
```

```{r}
####adjusting observation period
data.prep$Date_Range_Start <- gsub("-", "", data.prep$Date_Range_Start)
data.prep$Date_Range_End <- gsub("-", "", data.prep$Date_Range_End)

data.prep01 <- subset(data.prep, Date_Range_Start > 20210726 & Date_Range_End < 20210927)
View(data.prep01)

####adjusting geo area to D for Germany
data.prep02 <- subset(data.prep01, Regions != "US")
View(data.prep02)
data.DE<- subset(data.prep02, Regions == "DE, EU")
View(data.DE) #5728 obs
```

```{r}
####cleaning from individual, non-party advertisers
##### this step was conducted in several single steps,
##### the documentation below includes info on the individual or non-party advertiser 
#####which is provided as a justification for exclusion from the data set
######Sebastian Münzenmaier, AfD politician
data.clean <- subset(data.DE, Advertiser_Name != "Sebastian Münzenmaier") #5699 obs
######Urban Media GmbH, publisher 
data.clean <- subset(data.clean, Advertiser_Name != "Urban Media GmbH") #5695 obs
######Sabine Buder, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Sabine Buder") #5687 obs
######Jorinde Schulz, LEFT politician
data.clean <- subset(data.clean, Advertiser_Name != "Jorinde Schulz") #5684 obs
######Julius Kröger, (Jusos Aachen?)
data.clean <- subset(data.clean, Advertiser_Name != "Julius Kröger") #5683 obs
######Mareen Theil, spokesperson for MP Heilmann, CDU
data.clean <- subset(data.clean, Advertiser_Name != "Mareen Theil") #5680 obs
######Lucas Brandes, Intern for MP Frank Oelsterhelweg, CDU
data.clean <- subset(data.clean, Advertiser_Name != "Lucas Brandes") #5663 obs
######Campact e.V., NGO supporting democracy and social justice mainly through petitions
data.clean <- subset(data.clean, Advertiser_Name != "Campact e.V.") #5629 obs
######Karsten Scherschanski, FDP politician
data.clean <- subset(data.clean, Advertiser_Name != "Karsten Scherschanski") #5619 obs
######Verlag der Tagesspiegel GmbH, media
data.clean <- subset(data.clean, Advertiser_Name != "Verlag der Tagesspiegel GmbH") #5565 obs
######Katharina Pötter, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Katharina Pötter") #5563 obs
######Peter Stein, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Peter Stein") #5550 obs
######Verena Ute Hubertz, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Verena Ute Hubertz") #5539 obs
######Zweites Deutsches Fernsehen, media
data.clean <- subset(data.clean, Advertiser_Name != "Zweites Deutsches Fernsehen") #5534 obs
######Dr. Franka Kretschmer, independent candidate for Magdeburg
data.clean <- subset(data.clean, Advertiser_Name != "Dr. Franka Kretschmer") #5533 obs
######Florian Fuchs, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Florian Fuchs") #5531 obs
######Katharina Bartsch, GREEN politician
data.clean <- subset(data.clean, Advertiser_Name != "Katharina Bartsch") #5529 obs
######Timo Schisanowski, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Timo Schisanowski") #5458 obs
######Daniel Föst, FDP politician
data.clean <- subset(data.clean, Advertiser_Name != "Daniel Föst") #5453 obs
######Jan-Philipp Knoop, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Jan-Philipp Knoop") #5451 obs
######Jens MATHIAS STEIN, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Jens MATHIAS STEIN") #5427 obs
######Volker Brauer, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Volker Brauer") #5423 obs
######Hans-Böckler-Stiftung, political foundation
data.clean <- subset(data.clean, Advertiser_Name != "Hans-Böckler-Stiftung") #5422 obs
######Philipp da Cunha, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Philipp da Cunha") #5421 obs
######DEMOCRACY Deutschland e.V., NGO supporting democracy
data.clean <- subset(data.clean, Advertiser_Name != "DEMOCRACY Deutschland e.V.") #5401 obs
######Dr. Gerold Eberhard Papsch, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Dr. Gerold Eberhard Papsch") #5399 obs
######Thore Stein, AfD politician
data.clean <- subset(data.clean, Advertiser_Name != "Thore Stein") #5396 obs
######Kai Whittaker, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Kai Whittaker") #5386 obs
######Die BürgerNahen e.V., local candidates for municipal election
data.clean <- subset(data.clean, Advertiser_Name != "Die BürgerNahen e.V.") #5385 obs
######AfD-Fraktion im Sächs. Landtag, Party in parliament
data.clean <- subset(data.clean, Advertiser_Name != "AfD-Fraktion im Sächs. Landtag") #5384 obs
######Ulrika Schöllner, independent candidate for Leipzig
data.clean <- subset(data.clean, Advertiser_Name != "Ulrika Schöllner") #5383 obs
######FDP-Fraktion im Deutschen Bundestag, Party in parliament
data.clean <- subset(data.clean, Advertiser_Name != "FDP-Fraktion im Deutschen Bundestag") #5380 obs
######Isabell Charlotte Steidel, GREEN politician
data.clean <- subset(data.clean, Advertiser_Name != "Isabell Charlotte Steidel") #5373 obs
######Niklas Teßmann, independent candidate
data.clean <- subset(data.clean, Advertiser_Name != "Niklas Teßmann") #5371 obs
######Sören Pellmann, LEFt politician
data.clean <- subset(data.clean, Advertiser_Name != "Sören Pellmann") #5369 obs
######David Hess, ?
data.clean <- subset(data.clean, Advertiser_Name != "David Hess") #5344 obs
######Verlag Lensing-Wolff GmbH & Co. KG, media
data.clean <- subset(data.clean, Advertiser_Name != "Verlag Lensing-Wolff GmbH & Co. KG") #5324 obs
######SPD-Fraktion im Bundestag, party in parliament
data.clean <- subset(data.clean, Advertiser_Name != "SPD-Fraktion im Bundestag") #5323 obs
######Ann-Veruschka Jurisch, fdp politician
data.clean <- subset(data.clean, Advertiser_Name != "Ann-Veruschka Jurisch") #5318 obs
######Kai Koeser, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Kai Koeser") #5306 obs
######Henrik Werner, GREEN politician
data.clean <- subset(data.clean, Advertiser_Name != "Henrik Werner") #5305 obs
######Cansel Kiziltepe, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Cansel Kiziltepe") #5303 obs
######Marcus Carsten Fuchs, independent candidate Sachsen
data.clean <- subset(data.clean, Advertiser_Name != "Marcus Carsten Fuchs") #5295 obs
######Florian Westerwalbesloh, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Florian Westerwalbesloh") #5294 obs
######The ONE Campaign gGmbH, campaign agency
data.clean <- subset(data.clean, Advertiser_Name != "The ONE Campaign gGmbH") #5293 obs
######campaigners Network GmbH, campaign agency
data.clean <- subset(data.clean, Advertiser_Name != "campaigners Network GmbH") #5291 obs
######Neue Wege für Berlin e.V., NGO supporting change in Berlin
data.clean <- subset(data.clean, Advertiser_Name != "Neue Wege für Berlin e.V.") #5283 obs
######Andrew Ullmann, FDP politician
data.clean <- subset(data.clean, Advertiser_Name != "Andrew Ullmann") #5273 obs
######Volker Mayer-Lay, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Volker Mayer-Lay") #5272 obs
######Christina Daniela Stumpp, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Christina Daniela Stumpp") #5269 obs
######Dirk Stamer, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Dirk Stamer") #5267 obs
######Roman Wick (?)
data.clean <- subset(data.clean, Advertiser_Name != "Roman Wick") #5264 obs
######Joachim Horst Krämer, FDP politician
data.clean <- subset(data.clean, Advertiser_Name != "Joachim Horst Krämer") #5261 obs
######Saskia Ludwig, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Saskia Ludwig") #5248 obs
######Julia Söhne, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Julia Söhne") #5239 obs
######Thomas Bareiß, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Thomas Bareiß") #5236 obs
######Franziska Hoppermann, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Franziska Hoppermann") #5234 obs
######Konrad-Adenauer-Stiftung e.V., political foundation
data.clean <- subset(data.clean, Advertiser_Name != "Konrad-Adenauer-Stiftung e.V.") #5229 obs
######Dr. Jens Zimmermann, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Dr. Jens Zimmermann") #5227 obs
######DIE FAMILIENUNTERNEHMER e. V., Lobbying for family owned businesses
data.clean <- subset(data.clean, Advertiser_Name != "DIE FAMILIENUNTERNEHMER e. V.") #5218 obs
######Zentrum Liberale Moderne, Think Tank
data.clean <- subset(data.clean, Advertiser_Name != "Zentrum Liberale Moderne") #5217 obs
######Marcus Finselberger, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Marcus Finselberger") #5214 obs
######Alexander Engelhard, CSU politician
data.clean <- subset(data.clean, Advertiser_Name != "Alexander Engelhard") #5213 obs
######Petra Nicolaisen, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Petra Nicolaisen") #5197 obs
######Ronald Härtlein, Republik in Bewegung politician
data.clean <- subset(data.clean, Advertiser_Name != "Ronald Härtlein") #5190 obs
######Thomas Knott, Mittelstandsinitiative (?)
data.clean <- subset(data.clean, Advertiser_Name != "Thomas Knott") #5180 obs
######Oliver Sieke, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Oliver Sieke") #5166 obs
######Matthias Katsch, SPD politician
data.clean <- subset(data.clean, Advertiser_Name != "Matthias Katsch") #5161 obs
######Matthias Hauer, CDU politician
data.clean <- subset(data.clean, Advertiser_Name != "Matthias Hauer") #5159 obs
######Susanne Ferschl, LEFT politician
data.clean <- subset(data.clean, Advertiser_Name != "Susanne Ferschl") #5158 obs
######Forum für politischen und gesellschaftlichen Informationsaustausch, Think Tank
data.clean <- subset(data.clean, Advertiser_Name != "Forum für politischen und gesellschaftlichen Informationsaustausch") #5155 obs
######Jens Teutrine, FDP politician
data.clean <- subset(data.clean, Advertiser_Name != "Jens Teutrine") #5148 obs
######AfD Fraktion i.d. Hbg. Bürgers, Parliamentary group
data.clean <- subset(data.clean, Advertiser_Name != "AfD Fraktion i.d. Hbg. Bürgers") #5131 obs
```

```{r}
####data.clean is maintained as milestone
View(data.clean ) #5131 obs
write.csv(data.clean, "data_clean.csv")
```

```{r}
######
#### second access option
#library(readr)
#df <- read.csv("data_clean.csv", sep=',')

df <- data.clean
View(df)
```

```{r}
##################
# Package openxlsx version 4.2.5.2 documented via https://github.com/ycphs/openxlsx 
library(openxlsx)
# Package dplyr version 1.1.4 documented via https://dplyr.tidyverse.org/
library(dplyr)
```

```{r}
###########
####stepwise pooling each party
#Recode Parties
afd <- c(
  "AfD Landesverband Berlin",
  "Alternative für Deutschland",
  "Alternative für Deutschland (AfD)",
  "AfD Baden-Württemberg")


b90 <- c(
  "BÜNDNIS 90 / DIE GRÜNEN Charlottenburg Wilmersdorf",
  "Bündnis 90/ Die Grünen Mecklenburg-Vorpommern",
  "Bündnis 90/Die Grünen Berlin",
  "BÜNDNIS 90/DIE GRÜNEN Kreisverband Aachen",
  "BÜNDNIS 90/DIE GRÜNEN Kreisverband Schweinfurt",
  "Bündnis 90/Die Grünen KV Stuttgart",
  "Bündnis 90/Die Grünen Ortsverband Gütersloh Bündnis 90/Die Grünen Gütersloh",
  "BÜNDNIS 90/DIE GRÜNEN Ortsverband Willich",
  "Bündnis 90/Die Grünen/GAL Münster",
  "Bündnis90/ Die Grünen",
  "Bündnis90/Die Grünen Kreisverband Bodensee")


buc <- "Bündnis C - Christen für Deutschland"


cdu <- c(
  "CDU Christlich Demokratische Union",
  "CDU Landesverband Mecklenburg/Vorpommern",
  "CDU Deutschlands",
  "Christlich Demokratische Union (CDU) im Wahlkreis 208 Neustadt - Speyer",
  "CDU-Kreisverband Pinneberg",
  "CDU Landesverband Berlin",
  "CDU Kreisverband Kassel-Land",
  "CDU Kreisverband Cuxhaven",
  "CDU Stadtverband Schwalmstadt",
  "CDU-Landesverband Baden-WÃ¼rttemberg",
  "CDU Kreisverband Görlitz",
  "Christlich Demokratische Union (CDU) Landesverband ThÃ¼ringen",
  "CDU Landesverband Rheinland-Pfalz",
  "CDU Kreisverband Main-Tauber",
  "CDU Kreisverband Bremen-Nord",
  "CDU Kreisverband Mainz",
  "CDU Schwalm-Eder"
)

csu <- c(
  "Christlich-Soziale Union Bayern eingetragener Verein",
  "CSU"
)

fdp <- c(
  "Freie Demokratische Partei (FDP), Landesverband Bayern eingetragener Verein",
  "FDP Kreisverband Ravensburg",
  "Freie Demokratische Partei",
  "FDP Freiburg",
  "Freie Demokratische Partei (FDP)",
  "FDP Landesverband Saarland",
  "FDP Kreisverband Bielefeld",
  "Junge Liberale e.V.",
  "FDP Kreisverband Bochum",
  "FDP Kreisverband Speyer",
  "FDP Oberallgäu",
  "FDP Heidelberg",
  "FDP Kreisverband Heidekreis",
  "FDP Bayern Kreisverband MÃ¼nchen-Land Ortsverband (OV) GrÃ¼nwald",
  "FDP Nürnberg",
  "FDP Kreisverband Regensburg-Stadt"
)

fw <- c(
  "FREIE WÄHLER Niedersachsen",
  "Landesvereinigung FREIE WÄHLER Bayern e.V."
)

humanisten <- "Partei der Humanisten"

linke <- c(
  "DIE LINKE. Mecklenburg-Vorpommern",
  "DIE LINKE",
  "DIE LINKE. Kreisverband Hildesheim",
  "DIE LINKE Landesverband Berlin",
  "Die Linke Hessen"
)

odp <- "Ökologisch-Demokratische Partei (ÖDP)"

piraten <- c(
  "Piratenpartei Deutschland",
  "Piratenpartei Deutschland Landesverband Hessen"
)

sgp <- "Sozialistische Gleichheitspartei (SGP)"

spd <- c(
  "Sozialdemokratische Partei Deutschlands (SPD) SPD Ortsverein Neu Wulmstorf",
  "Sozialdemokratische Partei Deutschlands SPD",
  "Sozialdemokratische Partei Deutschlands Unterbezirk Bonn",
  "Sozialdemokratische Partei Deutschlands, Landesverband Berlin",
  "SPD-Kreisverband Rastatt/Baden-Baden",
  "SPD-Ortsverein Uelzen",
  "SPD-UB Landkreis Harburg",
  "SPD-Unterbezirk Aachen-Stadt",
  "SPD-Unterbezirk Coesfeld",
  "SPD-Unterbezirk Main-Taunus",
  "SPD Baden-Württemberg",
  "SPD Eimsbüttel",
  "SPD Kreisverband Gütersloh",
  "SPD Kreisverband Warendorf",
  "SPD Landesorganisation Hamburg",
  "SPD Mecklenburg Vorpommern",
  "SPD Ortsverein Augustfehn",
  "SPD Ortsverein Löningen",
  "SPD Ortsverein Uchte",
  "SPD Osnabrück",
  "SPD Rhein-Neckar",
  "SPD Schwalm-Eder"
)  
  
tierschutz <- "PARTEI MENSCH UMWELT TIERSCHUTZ (Tierschutzpartei)"
volt <- "Volt Deutschland"
```

```{r}
df <- df |>
  mutate(advertiser = case_when(
    Advertiser_Name %in% spd ~ 1,
    Advertiser_Name %in% cdu ~ 2,
    Advertiser_Name %in% csu ~ 3,
    Advertiser_Name %in% b90 ~ 4,
    Advertiser_Name %in% afd ~ 5,
    Advertiser_Name %in% fdp ~ 6,
    Advertiser_Name %in% linke ~ 7,
    Advertiser_Name %in% fw ~ 8,
    Advertiser_Name %in% odp ~ 9,
    Advertiser_Name %in% tierschutz ~ 10,
    Advertiser_Name %in% piraten ~ 11,
    Advertiser_Name %in% humanisten ~ 12,
    Advertiser_Name %in% buc ~ 13,
    Advertiser_Name %in% volt ~ 14,
    Advertiser_Name %in% sgp ~ 15,
    TRUE ~ NA
  ))

table(df$advertiser)

```
# Parties

| Code | Party                                |
|------|--------------------------------------|
| 1    | SPD                                  |
| 2    | CDU                                  |
| 3    | CSU                                  |
| 4    | Die Grünen                           |
| 5    | AfD                                  |
| 6    | FDP                                  |
| 7    | Die Linke                            |
| 8    | Freie Wähler                         |
| 9    | ÖDP                                  |
| 10   | Tierschutzpartei                     |
| 11   | Piraten                              |
| 12   | Humanisten                           |
| 13   | Bündnis C - Christen für Deutschland |
| 14   | Volt                                 |
| 15   | Sozialistische Gleichheitspartei     |

```{r}
############
#Recode Ad type
library(dplyr)
df <- df |>
  mutate(Ad_Type_num = as.numeric(factor(Ad_Type)))

table(df$Ad_Type_num)
detach("package:dplyr", unload = TRUE)
```
# Ad Type

| Code | Meaning |
|------|---------|
| 1    | Image   |
| 2    | Text    |
| 3    | Video   |


#Figure 1 in the Text
```{r}
#Package ggplot; version: 3.5.1; full documentation: https://ggplot2.tidyverse.org/
library(ggplot2)
# Create a data frame
df.plot <- data.frame(
  category = c("SPD", "CDU", "CSU", "Greens", "AfD", "FDP", "Left", "FW", "ÖDP", "Tierschutz", "Pirates", "Humanists", "BUC", "Volt", "SGP"),
  values = c(374, 301, 35, 185, 2494, 1257, 76, 17, 38, 30, 9, 3, 8, 299, 3)
)

#Prepare category for plotting (factor maintains order)
df.plot$category <- factor(df.plot$category, levels = df.plot$category)

# Create the bar plot
ggplot(df.plot, aes(x = category, y = values)) +
  geom_bar(stat = "identity", fill = "darkslategray") +
  labs(title = "Parties advertising via Google services", x = "Party", y = "N of Ads") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

detach("package:ggplot2", unload = TRUE)
```
#Table 1 in the text
```{r}
library(dplyr)
df <- df |>
  mutate(advertiser_reduced = case_when(
    Advertiser_Name %in% spd ~ 1,
    Advertiser_Name %in% cdu ~ 2,
    Advertiser_Name %in% csu ~ 3,
    Advertiser_Name %in% b90 ~ 4,
    Advertiser_Name %in% afd ~ 5,
    Advertiser_Name %in% fdp ~ 6,
    Advertiser_Name %in% linke ~ 7,
    Advertiser_Name %in% volt ~ 14,
    TRUE ~ NA
  ),
  advertiser_label = factor(case_when(
    advertiser_reduced == 1 ~ "SPD",
    advertiser_reduced == 2 ~ "CDU",
    advertiser_reduced == 3 ~ "CSU",
    advertiser_reduced == 4 ~ "Greens",
    advertiser_reduced == 5 ~ "AfD",
    advertiser_reduced == 6 ~ "FDP",
    advertiser_reduced == 7 ~ "Left",
    advertiser_reduced == 14 ~ "Volt",
    TRUE ~ "Other"
  ), levels = c("SPD", "CDU", "CSU", "Greens", "AfD", "FDP", "Left", "Volt", "Other")))

table(df$advertiser_label)

table01 <- table(df$Ad_Type_num, df$advertiser_label)

print(table01)

```
# Parties

| Code | Party                                |
|------|--------------------------------------|
| 1    | SPD                                  |
| 2    | CDU                                  |
| 3    | CSU                                  |
| 4    | Die Grünen                           |
| 5    | AfD                                  |
| 6    | FDP                                  |
| 7    | Die Linke                            |
| 14   | Volt                                 |


#Figure 2 in the text
```{r}
table(df$advertiser)
library(ggplot2)
#Package reshape 2; version: 1.4.4, full documentation: https://github.com/hadley/reshape
library(reshape2)
# Create a data frame
matrix1 <- matrix(c(108, 42, 18, 83, 2191, 493, 21, 122,
  		              105, 48, 5, 81, 4, 433, 19, 31,
  		              161, 211, 12, 21, 299, 331, 36, 146), 
                    ncol = 8, byrow = TRUE)
rownames(matrix1) <- c("Image", "Text", "Video")
colnames (matrix1) <- c("SPD (n=374)", "CDU (n=301)", "CSU (n=35)", "Greens (n=185)", "AfD (n=2494)", "FDP (n=1257)", "Left (n=76)", "Volt (n=299)")

# calculating percentages per column
prop_data <- prop.table(matrix1, margin =2)

print(prop_data)

# DataFrame for ggplot2
df.plot01 <- as.data.frame(prop_data)
df.plot01$category <-rownames(df.plot01)
df_melted <- melt(df.plot01, id.vars = "category")

colors <- c("Image" = "darkslategray", "Text" = "darksalmon", "Video" = "deepskyblue4")

# creating bar plot ggplot2
ggplot(df_melted, aes(x = variable, y = value, fill = category)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Share of Media Types by Party",
       x = "Party",
       y = "%",
       fill = "Media Type") +
  scale_fill_manual(values = colors) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
# Party Level or unit as advertiser
```{r}
##########################
####stepwise pooling
#Recode Party Level
fed_party <- c(
  "CDU Christlich Demokratische Union",
  "Volt Deutschland",
  "CDU Deutschlands",
  "Freie Demokratische Partei",
  "Alternative für Deutschland",
  "Freie Demokratische Partei (FDP)",
  "Sozialdemokratische Partei Deutschlands SPD",
  "DIE LINKE",
  "Alternative für Deutschland (AfD)",
  "Ökologisch-Demokratische Partei (ÖDP)",
  "Bündnis90/ Die Grünen",
  "Sozialistische Gleichheitspartei (SGP)",
  "PARTEI MENSCH UMWELT TIERSCHUTZ (Tierschutzpartei)",
  "Piratenpartei Deutschland",
  "Ökologisch Demokratische Partei",
  "Bündnis C - Christen für Deutschland",
  "Partei der Humanisten"
)

state_party <- c(
  "AfD Landesverband Berlin",
  "AfD Baden-Württemberg",
  "Bündnis 90/ Die Grünen Mecklenburg-Vorpommern",
  "Bündnis 90/Die Grünen Berlin",
  "CDU Landesverband Mecklenburg/Vorpommern",
  "CDU Landesverband Berlin",
  "CDU Landesverband Rheinland-Pfalz",
  "Christlich-Soziale Union Bayern eingetragener Verein",
  "CSU",
  "Freie Demokratische Partei (FDP), Landesverband Bayern eingetragener Verein",
  "FDP Landesverband Saarland",
  "FREIE WÄHLER Niedersachsen",
  "Landesvereinigung FREIE WÄHLER Bayern e.V.",
  "DIE LINKE Landesverband Berlin",
  "Die Linke Hessen",
  "DIE LINKE. Mecklenburg-Vorpommern",
  "Piratenpartei Deutschland Landesverband Hessen",
  "SPD Baden-Württemberg",
  "SPD Landesorganisation Hamburg",
  "SPD Mecklenburg Vorpommern",
  "Sozialdemokratische Partei Deutschlands, Landesverband Berlin"
)

local_party <- c(
  "BÜNDNIS 90 / DIE GRÜNEN Charlottenburg Wilmersdorf",
  "BÜNDNIS 90/DIE GRÜNEN Kreisverband Aachen",
  "BÜNDNIS 90/DIE GRÜNEN Kreisverband Schweinfurt",
  "Bündnis 90/Die Grünen KV Stuttgart",
  "Bündnis 90/Die Grünen Ortsverband Gütersloh Bündnis 90/Die Grünen Gütersloh",
  "BÜNDNIS 90/DIE GRÜNEN Ortsverband Willich",
  "Bündnis 90/Die Grünen/GAL Münster",
  "Bündnis90/Die Grünen Kreisverband Bodensee",
  "Christlich Demokratische Union (CDU) im Wahlkreis 208 Neustadt - Speyer",
  "CDU-Kreisverband Pinneberg",
  "CDU Kreisverband Kassel-Land",
  "CDU Kreisverband Cuxhaven",
  "CDU Stadtverband Schwalmstadt",
  "CDU Kreisverband Görlitz",
  "CDU Kreisverband Main-Tauber",
  "CDU Kreisverband Bremen-Nord",
  "CDU Kreisverband Mainz",
  "CDU Schwalm-Eder",
  "FDP Kreisverband Ravensburg",
  "FDP Freiburg",
  "FDP Kreisverband Bielefeld",
  "FDP Kreisverband Bochum",
  "FDP Kreisverband Speyer",
  "FDP Oberallgäu",
  "FDP Heidelberg",
  "FDP Kreisverband Heidekreis",
  "FDP Nürnberg",
  "FDP Kreisverband Regensburg-Stadt",
  "DIE LINKE. Kreisverband Hildesheim",
  "Sozialdemokratische Partei Deutschlands (SPD) SPD Ortsverein Neu Wulmstorf",
  "Sozialdemokratische Partei Deutschlands Unterbezirk Bonn",
  "SPD-Kreisverband Rastatt/Baden-Baden",
  "SPD-Ortsverein Uelzen",
  "SPD-UB Landkreis Harburg",
  "SPD-Unterbezirk Aachen-Stadt",
  "SPD-Unterbezirk Coesfeld",
  "SPD-Unterbezirk Main-Taunus",
  "SPD Eimsbüttel",
  "SPD Kreisverband Gütersloh",
  "SPD Kreisverband Warendorf",
  "SPD Ortsverein Augustfehn",
  "SPD Ortsverein Löningen",
  "SPD Ortsverein Uchte",
  "SPD Osnabrück",
  "SPD Rhein-Neckar",
  "SPD Schwalm-Eder"
)

youth <- c(
  "Junge Liberale e.V."
)
library(dplyr)
####creation of new variable
df <- df |>
  mutate(party_level = case_when(
    Advertiser_Name %in% fed_party ~ 1,
    Advertiser_Name %in% state_party ~ 2,
    Advertiser_Name %in% local_party ~ 3,
    Advertiser_Name %in% youth ~ 4,
    TRUE ~ NA
  )) 

table(df$party_level)
prop.table(table(df$party_level))

```
# Party Level

| Code | Party                                |
|------|--------------------------------------|
| 1    | Federal level                        |
| 2    | State level                          |
| 3    | Local Level                          |
| 4    | Youth Organisation                   |


```{r}
library(ggplot2)
# Create a data frame
df.plot1 <- data.frame(
  category = c("Federal Party", "State Party", "Local Party", "Youth Organisation"),
  values = c(84.8, 4.8, 10.1, 0.2)
)

# Create the bar plot
ggplot(df.plot1, aes(x = category, y = values)) +
  geom_bar(stat = "identity") +
  labs(title = "Advertising Level or Unit", x = "Level or Unit", y = "%")+
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
detach("package:ggplot2", unload = TRUE)
```
# Gender Targeting
```{r}
##########################
#Recode Gender Target
df <- df |>
  mutate(target_gender = as.numeric(factor(Gender_Targeting)))

table(df$target_gender)
table(df$target_gender, df$Gender_Targeting)
# 1= 'male,female, unknown", 2= 'Not targeted'
# "Male, Female, Unknown gender" includes all genders. Therefore, this is not targeted. 
```
# Gender Codes
| Code | Party                                |
|------|--------------------------------------|
| 1    | Male, Female, Unknown gender         |
| 2    | Not targeted                         |

# Age targeting
```{r}
#Age
table(df$Age_Targeting)
#Explanation of Findings: 4953 ads were not focused on a certain group, another 157 were explicitly not targeted; 5 focused on the younger demographic (18-24, 25-34, unknown age) and 16 targeted the age groups 35 and older (+ unknown age)

table(df$Age_Targeting, df$advertiser_label)

#the FDP was the only party targeting for different age groups
```
# Findings on geotargeting (included areas)
```{r}
########################
#####pooling the different target areas was conducted by a series of steps
### first step: generating a new variable based on the geo-targeting (included areas)
##Recode Geo Target
df <- df |>
  mutate(target_geo = as.numeric(factor(Geo_Targeting_Included)))

table(df$target_geo)

#write.csv(df, "df_geo.csv")
```

```{r}
#####second step: the original information included numeric and character codes in the same cell, which required looking up the respective zip-codes (web research; ChatGPT) 
#####and grouping them on the state level and adding the codes already indicating a town, district or state
#####other codes: 161 for Germany; 
df$target_BL[df$target_geo %in% c(115, 79, 80, 81, 84, 85, 114, 137, 150, 151, 152, 153, 159, 160, 189, 190)] <- 1001 #Baden-Württemberg
df$target_BL[df$target_geo %in% c(119, 82, 83, 86, 99, 100, 101, 117, 120, 129, 178, 184, 188, 192, 193, 194, 195, 201, 202)] <- 1002 #Bayern
df$target_BL[df$target_geo %in% c(122, 5, 6, 7, 8, 9, 10, 11, 12, 13, 123, 146)] <- 1003 #Berlin
df$target_BL[df$target_geo %in% c(135, 25)] <- 1004 #Brandenburg
df$target_BL[df$target_geo %in% c(138, 142, 143)] <- 1005 #Bremen
df$target_BL[df$target_geo %in% c(171, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 154, 172)] <- 1006 #Hamburg
df$target_BL[df$target_geo %in% c(174, 69, 77, 106, 107, 112, 158, 180, 181)] <- 1007 #Hessen
df$target_BL[df$target_geo %in% c(183, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 197)] <- 1008 #Mecklenburg-Vorpommern
df$target_BL[df$target_geo %in% c(43, 60, 61, 62, 63, 64, 65, 70, 74, 97, 98, 113, 144, 145, 147, 167, 173, 175, 186, 200, 203, 204, 205)] <- 1009 #Niedersachsen
df$target_BL[df$target_geo %in% c(66, 67, 68, 71, 72, 73, 75, 76, 88, 89, 90, 91, 111, 130, 131, 132, 134, 157, 168, 169, 170, 185, 206)] <- 1010 #Nordrhein-Westfalen
df$target_BL[df$target_geo %in% c(92, 93, 94, 96, 102, 103, 104, 105, 108, 109, 110, 121, 133, 148, 177, 179, 196)] <- 1011 #Rheinland-Pfalz
df$target_BL[df$target_geo %in% c(198, 78, 182)] <- 1012 #Saarland
df$target_BL[df$target_geo %in% c(199, 1, 2, 3, 95, 128, 149, 166)] <- 1013 #Sachsen 
df$target_BL[df$target_geo %in% c()] <- 1014 #Sachen-Anhalt
df$target_BL[df$target_geo %in% c(59, 118, 155)] <- 1015 #Schleswig-Holstein
df$target_BL[df$target_geo %in% c(156, 165)] <- 1016 #Thüringen
df$target_BL[df$target_geo %in% c(161, 87, 125, 126, 127, 136)] <- 1000 #Germany
df$target_BL[df$target_geo %in% c(4, 124, 164)] <- 999 #Germany_East
df$target_BL[df$target_geo %in% c(163, 176)] <- 998 #Germany-West
df$target_BL[df$target_geo %in% c(116)] <- 997 #Germany-East_West_mix
df$target_BL[df$target_geo %in% c(14, 15, 16, 17, 18, 19, 20, 22, 21, 23, 24, 139, 140, 141, 162, 191)] <- 900 #unspezifisch
df$target_BL[df$target_geo %in% c(187)] <- 9999 #not targeted

table(df$target_BL)

```
# Geo_Targeting_Inclusive

| Code | Party                                |
|------|--------------------------------------|
| 900  | unspecific                           |
| 997  | Germany-East_West_mix                |
| 998  | Germany-West_mix                     |
| 999  | Germany-East_mix                     |
| 1000 | Germany                              |
| 1001 | Baden-Württemberg                    |
| 1002 | Bayern                               |
| 1003 | Berlin                               |
| 1004 | Brandenburg                          |
| 1005 | Bremen                               |
| 1006 | Hamburg                              |
| 1007 | Hessen                               |
| 1008 | Mecklenburg-Vorpommern               |
| 1009 | Niedersachsen                        |
| 1010 | Nordrhein-Westfalen                  |
| 1011 | Rheinland-Pfalz                      |
| 1012 | Saarland                             |
| 1013 | Sachsen                              |
| 1014 | Sachsen-Anhalt                       |
| 1015 | Schleswig-Holstein                   |
| 1016 | Thüringen                            |
| 9999 | not targeted                         |

```{r}
#############
####Package labelled, version: 2.13.0; full documentation: https://larmarange.github.io/labelled/ 
library(labelled)
####Package expss, version: 0.11.6; full documentation: https://gdemin.github.io/expss/
library(expss)

val_lab(df$target_BL) <- make_labels("900 unspezifisch
                                      997 West_East_mix
                                      998 West_mix
                                      999 East_mix
									  1000 Deutschland
									  1001 Baden-Württemberg
									  1002 Bayern
									  1003 Berlin
									  1004 Brandenburg
									  1005 Bremen
									  1006 Hamburg
									  1007 Hessen
									  1008 Mecklenburg-Vorpommern
									  1009 Niedersachsen
									  1010 Nordrhein-Westfalen
									  1011 Rheinland-Pfalz
									  1012 Saarland
									  1013 Sachsen
									  1014 Sachen-Anhalt
									  1015 Schleswig-Holstein
									  1016 Thüringen
									  9999 not_targeted")

table(df$target_BL)
```

```{r}
# Which Party tageted which State?
table(df$target_BL, df$advertiser_label)

#Explanation of major findings: CSU only runs in Bavaria; VOLT and FDP targeted cities (unspecific for States); AfD competed for certain districts in Hamburg; Left defended two out of four direct mandates in Berlin (Gysi and Lötzsche) and one in Leipzig, Sachsen (Pellmann); 
```
# Findings on geotargeting (excluded areas)
```{r}
##Recode Geo Target
df <- df |>
  mutate(target_geo_ex = as.numeric(factor(Geo_Targeting_Excluded)))

table(df$target_geo_ex)

library(dplyr)

df$target_BL_ex[df$target_geo_ex %in% c(20)] <- 100 # not targeted
df$target_BL_ex[df$target_geo_ex %in% c(1, 3, 19)] <- 101 # unspezifisch Land
df$target_BL_ex[df$target_geo_ex %in% c(13, 14, 15, 16)] <- 102 # unspezifisch Stadt
df$target_BL_ex[df$target_geo_ex %in% c(2)] <- 103 #Berlin Prenzl, Mitte, Friedrichshain, Kreuzberg
df$target_BL_ex[df$target_geo_ex %in% c(4)] <- 104 # Stolzenau Niedersachsen
df$target_BL_ex[df$target_geo_ex %in% c(5)] <- 105 # Petershagen (NRW), Stolzenau (NieS)
df$target_BL_ex[df$target_geo_ex %in% c(6,8)] <- 106 # Austria, Switzerland
df$target_BL_ex[df$target_geo_ex %in% c(7, 9)] <- 107 # Austria, Italy, Baden-Württemberg, Argenbrühl
df$target_BL_ex[df$target_geo_ex %in% c(10)] <- 108 # Austria, Switzerland, Baden-Württemberg, Hessen, Sachsen, Thüringen
df$target_BL_ex[df$target_geo_ex %in% c(11)] <- 109 # Baden-Württemberg
df$target_BL_ex[df$target_geo_ex %in% c(12)] <- 110 # Bayern
df$target_BL_ex[df$target_geo_ex %in% c(17)] <- 111 # Konstanz
df$target_BL_ex[df$target_geo_ex %in% c(18)] <- 112 # Mannheim, Frankenthal, Ludwigshafen
df$target_BL_ex[df$target_geo_ex %in% c(21)] <- 113 # Saarland
df$target_BL_ex[df$target_geo_ex %in% c(22)] <- 114 # Schleswig-Holstein, Saarland

library(labelled)
library (expss)

val_lab(df$target_BL_ex) <- make_labels("100 not targeted
									  101 unspezifisch Land
									  102 unspezifisch Stadt
									  103 Berlin Prenzl, Mitte, Friedrichshain, Kreuzberg
									  104 Stolzenau Niedersachsen
									  105 Petershagen (NRW), Stolzenau (NieS)
									  106 Austria, Switzerland
									  107 Austria, Italy, Baden-Württemberg, Argenbrühl
									  108 Austria, Switzerland, Baden-Württemberg, Hessen, Sachsen, Thüringen
									  109 Baden-Württemberg
									  110 Bayern
									  111 Konstanz
									  112 Mannheim, Frankenthal, Ludwigshafen
									  113 Saarland
									  114 Schleswig-Holstein, Saarland")
									  
detach("package:expss", unload = TRUE)
detach("package:labelled", unload = TRUE)
table(df$target_BL_ex)
```
# Geo_Targeting_Excluded

| Code | Party                                                               |
|------|---------------------------------------------------------------------|
| 100  | not targeted                                                        |
| 101  | unspecific State                                                    |
| 102  | unspecific City                                                     |
| 103  | Berlin Prenzl, Mitte, Friedrichshain, Kreuzberg                     |
| 104  | Stolzenau Niedersachsen                                             |
| 105  | Petershagen (NRW), Stolzenau (NieS)                                 |
| 106  | Austria, Switzerland                                                |
| 107  | Austria, Italy, Baden-Württemberg, Argenbrühl                       |
| 108  | Austria, Switzerland, Baden-Württemberg, Hessen, Sachsen, Thüringen |
| 109  | Baden-Württemberg                                                   |
| 110  | Bayern                                                              |
| 111  | Konstanz                                                            |
| 112  | Mannheim, Frankenthal, Ludwigshafen                                 |
| 113  | Saarland                                                            |
| 114  | Schleswig-Holstein, Saarland                                        |

```{r}
# Which party excluded which areas?
table(df$target_BL_ex, df$advertiser_label)
#explanation of major findings: CDU: Does not compete in Bavaria; CSU: Bavaria borders Switzerland and Austria; Greens were not able to run in Saarland dur to a formal mistake when submitting the list to the electoral commission; AfD very week in These Berlin districts)
```

# Figure 3 in the text
```{r}
table(df$Date_Range_Start)
df$Date_Range_Start1 <- as.Date(df$Date_Range_Start, format = "%Y%m%d")
table(df$Date_Range_Start1)

library(ggplot2)

# Convert date to Date format and create a data frame
dfplot2 <- data.frame(
  date = as.Date(c("2021-07-27", "2021-07-28", "2021-07-30", "2021-08-01", "2021-08-02", "2021-08-03", "2021-08-04", "2021-08-05", "2021-08-06", "2021-08-07", "2021-08-08", "2021-08-09", "2021-08-10", "2021-08-11", "2021-08-12", "2021-08-13", "2021-08-14", "2021-08-15", "2021-08-16", "2021-08-17", "2021-08-18", "2021-08-19", "2021-08-20", "2021-08-21", "2021-08-22", "2021-08-23", "2021-08-24", "2021-08-25", "2021-08-26", "2021-08-27", "2021-08-28", "2021-08-29", "2021-08-30", "2021-08-31", "2021-09-01", "2021-09-02", "2021-09-03", "2021-09-04", "2021-09-05", "2021-09-06", "2021-09-07", "2021-09-08", "2021-09-09", "2021-09-10", "2021-09-11", "2021-09-12", "2021-09-13", "2021-09-14", "2021-09-15", "2021-09-16", "2021-09-17", "2021-09-18", "2021-09-19", "2021-09-20", "2021-09-21", "2021-09-22", "2021-09-23", "2021-09-24", "2021-09-25")),
  values = c(22, 1, 1, 25, 53, 23, 11, 8, 37, 16, 3, 49, 6, 52, 11, 27, 12, 42, 56, 30, 10, 303, 117, 128, 40, 120, 47, 303, 311, 31, 14, 15, 88, 89, 914, 209, 246, 34, 27, 31, 85, 37, 38, 278, 72, 42, 72, 49, 308, 105, 36, 22, 35, 158, 57, 72, 22, 49, 32)
)

dfplot2$date <- as.Date(dfplot2$date, format = "%Y-%m-%d")
min <- as.Date(as.factor("2021-07-27"))
max <- NA

# Plot Line plot for new ads across observation period
ggplot(data = dfplot2, aes(x = date, y = values)) +
  geom_line(color = "darkslategray", linewidth = 1) +
  scale_x_date(date_labels = "%Y-%m-%d", date_breaks = "2 days", limits = c(min, max)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "Date", y = "N New Ads", title = "New Ads per Day across observation period")
```
# Figure 4 in the text
```{r}
library(dplyr)
df <- df |>
  mutate(advertiser_reduced = case_when(
    Advertiser_Name %in% spd ~ 1,
    Advertiser_Name %in% cdu ~ 2,
    Advertiser_Name %in% csu ~ 3,
    Advertiser_Name %in% b90 ~ 4,
    Advertiser_Name %in% afd ~ 5,
    Advertiser_Name %in% fdp ~ 6,
    Advertiser_Name %in% linke ~ 7,
    Advertiser_Name %in% volt ~ 14,
    TRUE ~ NA
  ),
  advertiser_label = factor(case_when(
    advertiser_reduced == 1 ~ "SPD",
    advertiser_reduced == 2 ~ "CDU",
    advertiser_reduced == 3 ~ "CSU",
    advertiser_reduced == 4 ~ "Greens",
    advertiser_reduced == 5 ~ "AfD",
    advertiser_reduced == 6 ~ "FDP",
    advertiser_reduced == 7 ~ "Left",
    advertiser_reduced == 14 ~ "Volt",
    TRUE ~ "Other"
  ), levels = c("SPD", "CDU", "CSU", "Greens", "AfD", "FDP", "Left", "Volt", "Other")))

table(df$advertiser_label)

table_data1 <- table(df$Num_of_Days, df$advertiser_label)
print(table_data1)

#conversion into data frame
df_table1 <- as.data.frame(table_data1)
colnames(df_table1) <- c("Num_of_Days", "Advertiser", "Frequency")
df_table1$Num_of_Days <- as.numeric(as.character(df_table1$Num_of_Days))
df_table1 <- df_table1 %>%
  group_by(Num_of_Days) %>%
  mutate(Share = Frequency / sum(Frequency))


library(ggplot2)
ggplot(df_table1, aes(x = Num_of_Days, y = Share, fill = Advertiser)) +
  geom_area(alpha = 0.6, linewidth = 0.5, colour = "white") +
  labs(x = "Number of Days", y = "Share", title = "Number of Days Ads were displayed per Party") +
  scale_x_continuous(breaks = seq(1, 55, by = 5), limits = c(1, 55)) +
  scale_fill_manual(values = c("SPD" = "#FF0000", "CDU" = "#000000", "CSU" = "#666666", "Greens" = "#009900", 
                               "AfD" = "#3366FF", "FDP" = "#FFFF00", "Left" = "#990066", "Volt" = "#663399", "Other" = "white"),
                    labels = c("SPD", "CDU", "CSU", "Greens", "AfD", "FDP", "Left", "Volt", "Other")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

```
# Figure 5 in the text
```{r}
df$Date_Range_Startc <- as.character(df$Date_Range_Start)

# Convert to Date format
df$Date_Range_Start2 <- as.Date(df$Date_Range_Startc, format = "%Y%m%d")

# Create table 
table_data2 <- table(df$Date_Range_Start2, df$advertiser_label)

# Print the table
print(table_data2)


# Summarize the data to get the share per day for each advertiser
#Package tidyr: version 1.3.1 full documentation:https://tidyr.tidyverse.org/ 
library(tidyr)

df_summary <- df %>%
  group_by(Date_Range_Start2, advertiser_label) %>%
  summarise(Count = n()) %>%
  mutate(Share1 = Count / sum(Count)) %>%
  ungroup() %>%
  complete(Date_Range_Start2, advertiser_label) %>%   
  mutate(Share1 = ifelse(is.na(Share1), 0, Share1))

# Create the proportional stacked area plot
ggplot(df_summary, aes(x = Date_Range_Start2, y = Share1, fill = advertiser_label)) +
  geom_area(alpha = 0.6, linewidth = 0.5, colour = "white", position= "fill") +
  labs(x = "Date", y = "Share", title = "Proportional Share of Ads Displayed per Party Over Time") +
  scale_x_date(date_labels = "%Y-%m-%d", date_breaks = "5 days") +
  scale_fill_manual(values = c("SPD" = "#FF0000", "CDU" = "#000000", "CSU" = "#666666", 
                               "Greens" = "#009900", "AfD" = "#3366FF", "FDP" = "#FFFF00", 
                               "Left" = "#990066", "Volt" = "#663399", "Other" = "white"),
                    labels = c("SPD", "CDU", "CSU", "Greens", "AfD", "FDP", "Left", "Volt", "Other")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

```

```{r}
write.csv(df, "reaching_the_voter_complete.csv")
```
