library(dplyr)
library(ggplot2);
library(RMySQL)
db <- dbConnect(MySQL(), host='', user='', password='', dbname='')
Custom Theme (based on https://rpubs.com/Koundy/71792)
theme_Publication <- function(base_size=10, base_family="Helvetica") {
library(grid)
library(ggthemes)
(theme_foundation(base_size=base_size, base_family=base_family)
+ theme(plot.title = element_text(face = "bold",
size = rel(1), hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold",size = rel(1)),
axis.title.y = element_text(angle=90,vjust =2),
axis.title.x = element_text(vjust = -0.2),
axis.text = element_text(),
axis.line = element_line(colour="black"),
axis.ticks = element_line(),
panel.grid.major = element_line(colour="#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size= unit(0.2, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic", size = rel(1)),
plot.margin=unit(c(1,2,1,2),"mm"),
strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
strip.text = element_text(face="bold")
))
}
theme_set(theme_Publication())
scale_colour_discrete <- function(...) scale_color_brewer(..., palette="Set2")
legendInBox <- function(pos = c(0,0)) theme(legend.position = pos, legend.direction = "vertical", legend.box.background = element_rect(colour = "#000000"), legend.key.size = unit(0.9, "lines"))
update_geom_defaults("point", list(size = 0.7))
CA:VSIDS vs CA:RND
df <- dbGetQuery(db, "
SELECT * FROM Experiment as main
inner join Instance
on Instance.`ID-Instance` = main.`ID-Instance`
inner join Configuration
on Configuration.`ID-configuration` = main.`ID-configuration`
where
`RE-value` in ('lbd', 'luE3')
and `VD-value` in ('df95', 'df80')
and `PH-value` in ('std')
and `CE-value` in ('lin', 'glu', 'min')
and `CA-value` in ('vsids', 'lbd', 'rnd')
and `CL-value` in ('1uip')
and `PR-value` in ('on', 'off')
and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
library(ggplot2)
data = df %>% filter(Subfamily == "domset_4", `RE-value` == "lbd", `VD-value` == "df95", `PH-value` == "std", `CE-value` == "glu", `PR-value` == "on")
plot <- ggplot(data, aes(
x = `Variables`,
y = `CPU-time`,
color = `CA-value`)) + geom_point() + geom_line() +
labs(x = "Number of variables", y = "CPU time", color = "Clause assessment", title = "Dominating set formulas (k=4)") +
legendInBox(c(0.25,0.6))
ggsave(filename = "plots/ca_rnd_vs_vsidis_single.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot

data = df %>% filter()
timeouts = data %>%
group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>%
summarize(
timeout = sum(`Solver-answer` == "INDETERMINATE"),
outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
n = n())
rndVsLbd = inner_join(
timeouts %>% filter(`CA-value` == "rnd"),
timeouts %>% filter(`CA-value` == "lbd"),
by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
suffix = c(".rnd", ".other"))
rndVsLbd = rndVsLbd %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("lbd", c("lbd", "vsids")))
rndVsVsids = inner_join(
timeouts %>% filter(`CA-value` == "rnd"),
timeouts %>% filter(`CA-value` == "vsids"),
by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
suffix = c(".rnd", ".other"))
rndVsVsids = rndVsVsids %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("vsids", c("lbd", "vsids")))
data = union(rndVsLbd, rndVsVsids)
print("lbd - rnd")
[1] "lbd - rnd"
summary(data %>% filter(other == "lbd") %>% pull(diff))
Min. 1st Qu. Median Mean 3rd Qu. Max.
-38.00 0.00 1.00 3.12 6.00 33.00
print("vsids - rnd")
[1] "vsids - rnd"
summary(data %>% filter(other == "vsids") %>% pull(diff))
Min. 1st Qu. Median Mean 3rd Qu. Max.
-14.0000 0.0000 0.0000 0.8029 1.0000 33.0000
plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
geom_boxplot(outlier.shape = 4) +
stat_boxplot(geom ='errorbar', width = 0.2) +
labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
scale_x_discrete(labels = c('random - LBD','random - activity-based')) +
theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
ggsave(filename = "plots/ca_rnd_boxplot.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot

Each data point is a configuration for a subfamily, the difference is optained by only varying the clause assesment strategie.
The plot shows that there is a difference between vsids and rnd, especially the lower quartiel on the difference is zero, i.e. in less then 25% of the cases rnd is better (for no differnece one would expect more). Especially, when comparing this difference to lbd, it shows that it is not very relevant.
Decay Factor
Load Variable Decay Batch
df <- dbGetQuery(db, "
SELECT * FROM Experiment as main
inner join Instance
on Instance.`ID-Instance` = main.`ID-Instance`
inner join Configuration
on Configuration.`ID-configuration` = main.`ID-configuration`
where
`RE-value` in ('lbd', 'luE3')
and `VD-value` in ('df99', 'df95', 'df80', 'df65', 'rnd', 'lrb')
and `PH-value` in ('fix0', 'std')
and `CE-value` in ('glu', 'min')
and `CA-value` in ('vsids', 'lbd')
and `CL-value` in ('1uip')
and `PR-value` in ('on', 'off')
and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
data = df %>% filter(Subfamily == "op_partial", `CE-value` == "glu", `RE-value` == "lbd", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", !`VD-value` %in% c("lrb", "rnd"))
plot <- ggplot(data,
aes(x = `Variables`,
y = `CPU-time`, color = `VD-value`)
) +
geom_point() + geom_line() +
legendInBox(c(0.75,0.53)) +
labs(x = "Number of variables", y = "CPU time", color = "VSIDS decay factor", title = "Partial ordering principle formulas") +
scale_colour_discrete(labels = c("0.65", "0.80", "0.95", "0.99"))
ggsave(filename = "plots/vd_op_partial.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot

LRB
data = df %>% filter()
timeouts = data %>%
group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>%
summarize(
timeout = sum(`Solver-answer` == "INDETERMINATE"),
outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
n = n())
lrbVs65 = inner_join(
timeouts %>% filter(`VD-value` == "lrb"),
timeouts %>% filter(`VD-value` == "df65"),
by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs65 = lrbVs65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df65")
lrbVs99 = inner_join(
timeouts %>% filter(`VD-value` == "lrb"),
timeouts %>% filter(`VD-value` == "df99"),
by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs99 = lrbVs99 %>% mutate(diff = unsolved.x - unsolved.y, other = "df99")
df99Vsdf65 = inner_join(
timeouts %>% filter(`VD-value` == "df99"),
timeouts %>% filter(`VD-value` == "df65"),
by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
df99Vsdf65 = df99Vsdf65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df")
data <- union(lrbVs65, lrbVs99)
data <- union(data, df99Vsdf65)
print("df65 - lrb")
[1] "df65 - lrb"
summary(data %>% filter(other == "df65") %>% pull(diff))
Min. 1st Qu. Median Mean 3rd Qu. Max.
-25.0000 -2.0000 0.0000 -0.6743 0.0000 26.0000
print("df99 - lrb")
[1] "df99 - lrb"
summary(data %>% filter(other == "df99") %>% pull(diff))
Min. 1st Qu. Median Mean 3rd Qu. Max.
-27.0000 -1.0000 0.0000 -0.8606 1.0000 11.0000
print("df99 - df65")
[1] "df99 - df65"
summary(data %>% filter(other == "df") %>% pull(diff))
Min. 1st Qu. Median Mean 3rd Qu. Max.
-27.0000 -3.0000 0.0000 0.1863 1.0000 31.0000
plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
geom_boxplot(outlier.shape = 4) +
stat_boxplot(geom ='errorbar', width = 0.2) +
labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
scale_x_discrete(labels = c('VSIDS .99 - .65', 'LRB - VSIDS .65','LRB - VSIDS .99'))
ggsave(filename = "plots/vd_lrb_vs_vsids.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot

Restarts
Load main Batch
library(RMySQL)
df <- dbGetQuery(db, "
SELECT * FROM Experiment as main
inner join Instance
on Instance.`ID-Instance` = main.`ID-Instance`
inner join Configuration
on Configuration.`ID-configuration` = main.`ID-configuration`
where
`RE-value` in ('no', 'lbd', 'luE2', 'luE3')
and `VD-value` in ('df95', 'df80', 'lrb')
and `PH-value` in ('fix0', 'std')
and `CE-value` in ('no', 'lin', 'glu', 'min')
and `CA-value` in ('vsids', 'lbd', 'none')
and `CL-value` in ('1uip')
and `PR-value` in ('on', 'off')
and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95")
plot <- ggplot(data,
aes(x = `Variables`,
y = `CPU-time`, color = `RE-value`)
) +
geom_point() + geom_line() +
theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/restarts.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")

data = df %>% filter(Subfamily == "peb_pyrofpyr_neq3", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df80")
plot <- ggplot(data,
aes(x = `Variables`,
y = `CPU-time`, color = `RE-value`)
) +
geom_point() + geom_line() +
scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Pebbling on pyramid-of-pyramids graphs")
ggsave(filename = "plots/restarts2.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")

Database Size
data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")
ggplot(data,
aes(x = `Variables`,
y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
) +
geom_point() + geom_line() +
labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")

ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"),
aes(x = `Variables`,
y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
) +
geom_point() + geom_line() +
labs(x = "Number of variables", y = "Million conflicts")

Fancy Arrangement
library(gridExtra)
library(cowplot)
data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")
plot1 <- ggplot(data,
aes(x = `Variables`,
y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
) +
geom_point() + geom_line() +
theme(plot.margin=unit(c(0,2,0,2),"mm"), legend.position = "bottom") +
scale_colour_discrete(labels = c("glucose", "linear", "minisat")) +
labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")
legend <- get_legend(plot1)
plot1 <- plot1 + theme(legend.position = "none")
plot2 <- ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"),
aes(x = `Variables`,
y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
) +
geom_point() + geom_line() +
theme(plot.margin=unit(c(0,2,0,3),"mm"), legend.position = "none")+
labs(x = "Number of variables", y = "Million conflicts")
titlePlot <- ggplot() + labs(title = "Tseitin formulas on grid graphs (5 rows)") +
theme(plot.margin=unit(c(2,2,0,2),"mm"))
arrange <- arrangeGrob(plot1,plot2, ncol = 2)
arrange <- arrangeGrob(titlePlot, arrange, legend, heights = c(0.8,5,0.8) ,nrow = 3)
ggsave(filename = "plots/clauseErasure.pdf", arrange, units = "cm", device = "pdf", width = 10, height = 6)
Phase Saving
Load Phase Saving Batch
df <- dbGetQuery(db, "
SELECT * FROM Experiment as main
inner join Instance
on Instance.`ID-Instance` = main.`ID-Instance`
inner join Configuration
on Configuration.`ID-configuration` = main.`ID-configuration`
where
`Subfamily` = 'stone_width3chain_nhalfmarkers'
and `RE-value` in ('lbd', 'luE3')
and `VD-value` in ('df95', 'df80')
and `PH-value` in ('fix0', 'std', 'dynrnd', 'fixrnd', 'ctr')
and `CE-value` in ('glu', 'min')
and `CA-value` in ('vsids', 'lbd')
and `CL-value` in ('1uip')
and `PR-value` in ('on', 'off')
and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95", `RE-value` == "lbd")
plot <- ggplot(data,
aes(x = `Variables`,
y = `CPU-time`, color = `PH-value`)
) +
geom_point() + geom_line() +
theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1.2, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
scale_colour_discrete(labels = c("counter", "dynamic\nrandom", "fix zero", "fix random", "standard")) +
labs(x = "Number of variables", y = "CPU time", color = "Phase:", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/phase.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")

---
title: "Plots for Seeking Practical CDCL Insights from Theoretical SAT Benchmarks"
output: html_notebook
---

```{r}
library(dplyr)
library(ggplot2);
library(RMySQL)
db <- dbConnect(MySQL(), host='msql01.csc.kth.se', user='giraldez_user', password='ukbm5FIHvNuk', dbname='giraldez')
```

Custom Theme (based on https://rpubs.com/Koundy/71792)
```{r}
theme_Publication <- function(base_size=10, base_family="Helvetica") {
      library(grid)
      library(ggthemes)
      (theme_foundation(base_size=base_size, base_family=base_family)
       + theme(plot.title = element_text(face = "bold",
                                         size = rel(1), hjust = 0.5),
               plot.subtitle = element_text(hjust = 0.5),
               text = element_text(),
               panel.background = element_rect(colour = NA),
               plot.background = element_rect(colour = NA),
               panel.border = element_rect(colour = NA),
               axis.title = element_text(face = "bold",size = rel(1)),
               axis.title.y = element_text(angle=90,vjust =2),
               axis.title.x = element_text(vjust = -0.2),
               axis.text = element_text(), 
               axis.line = element_line(colour="black"),
               axis.ticks = element_line(),
               panel.grid.major = element_line(colour="#f0f0f0"),
               panel.grid.minor = element_blank(),
               legend.key = element_rect(colour = NA),
               legend.position = "bottom",
               legend.direction = "horizontal",
               legend.key.size= unit(0.2, "cm"),
               legend.spacing = unit(0, "cm"),
               legend.title = element_text(face = "italic", size = rel(1)),
               plot.margin=unit(c(1,2,1,2),"mm"),
               strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
               strip.text = element_text(face="bold")
          ))
      
}

theme_set(theme_Publication())
scale_colour_discrete <- function(...) scale_color_brewer(..., palette="Set2")
legendInBox <- function(pos = c(0,0)) theme(legend.position = pos, legend.direction = "vertical", legend.box.background = element_rect(colour = "#000000"), legend.key.size = unit(0.9, "lines"))
update_geom_defaults("point", list(size = 0.7))
```


# CA:VSIDS vs CA:RND

```{r}
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
 `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df95', 'df80')
 and `PH-value` in ('std')
 and `CE-value` in ('lin', 'glu', 'min')
 and `CA-value` in ('vsids', 'lbd', 'rnd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```

```{r}
library(ggplot2)
data = df %>% filter(Subfamily == "domset_4", `RE-value` == "lbd", `VD-value` == "df95", `PH-value` == "std", `CE-value` == "glu", `PR-value` == "on")

plot <- ggplot(data, aes(
  x = `Variables`,
  y = `CPU-time`,
  color = `CA-value`)) + geom_point() + geom_line() +
  labs(x = "Number of variables", y = "CPU time", color = "Clause assessment", title = "Dominating set formulas (k=4)") +
  legendInBox(c(0.25,0.6))

ggsave(filename = "plots/ca_rnd_vs_vsidis_single.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot
```

```{r}
data = df %>% filter()
timeouts = data %>% 
  group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>% 
  summarize(
    timeout = sum(`Solver-answer` == "INDETERMINATE"),
    outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
    unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
    n = n())

rndVsLbd = inner_join(
  timeouts %>% filter(`CA-value` == "rnd"),
  timeouts %>% filter(`CA-value` == "lbd"),
  by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
  suffix = c(".rnd", ".other"))
rndVsLbd = rndVsLbd %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("lbd", c("lbd", "vsids")))

rndVsVsids = inner_join(
  timeouts %>% filter(`CA-value` == "rnd"),
  timeouts %>% filter(`CA-value` == "vsids"),
  by = c("Subfamily", "RE-value", "VD-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value"),
  suffix = c(".rnd", ".other"))
rndVsVsids = rndVsVsids %>% mutate(diff = unsolved.rnd - unsolved.other, other = factor("vsids", c("lbd", "vsids")))

data = union(rndVsLbd, rndVsVsids)
           
print("lbd - rnd")
summary(data %>% filter(other == "lbd") %>% pull(diff))
print("vsids - rnd")
summary(data %>% filter(other == "vsids") %>% pull(diff))
plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
  geom_boxplot(outlier.shape = 4) +
  stat_boxplot(geom ='errorbar', width = 0.2) +
  labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
  scale_x_discrete(labels = c('random - LBD','random - activity-based')) + 
  theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
ggsave(filename = "plots/ca_rnd_boxplot.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot
```

Each data point is a configuration for a subfamily, the difference is optained by only varying the clause assesment strategie.

The plot shows that there is a difference between vsids and rnd, especially the lower quartiel on the difference is zero,
i.e. in less then 25% of the cases rnd is better (for no differnece one would expect more). Especially, when comparing
this difference to lbd, it shows that it is not very relevant.


# Decay Factor

Load Variable Decay Batch

```{r}
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
     `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df99', 'df95', 'df80', 'df65', 'rnd', 'lrb')
 and `PH-value` in ('fix0', 'std')
 and `CE-value` in ('glu', 'min')
 and `CA-value` in ('vsids', 'lbd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```


```{r}
data = df %>% filter(Subfamily == "op_partial", `CE-value` == "glu", `RE-value` == "lbd", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", !`VD-value` %in% c("lrb", "rnd"))

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `VD-value`)
       ) + 
  geom_point() + geom_line() + 
  legendInBox(c(0.75,0.53)) +
  labs(x = "Number of variables", y = "CPU time", color = "VSIDS decay factor", title = "Partial ordering principle formulas") +
  scale_colour_discrete(labels = c("0.65", "0.80", "0.95", "0.99"))
ggsave(filename = "plots/vd_op_partial.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot
```
## LRB

```{r}
data = df %>% filter()
timeouts = data %>% 
  group_by(Subfamily, `RE-value`, `VD-value`, `PH-value`, `CE-value`, `CA-value`, `CL-value`, `PR-value`, `SH-value`) %>% 
  summarize(
    timeout = sum(`Solver-answer` == "INDETERMINATE"),
    outofmem = sum(`Solver-answer` == "OUTOFMEMORY" ),
    unsolved = sum(`Solver-answer` %in% c("OUTOFMEMORY", "INDETERMINATE") ),
    n = n())

lrbVs65 = inner_join(
  timeouts %>% filter(`VD-value` == "lrb"),
  timeouts %>% filter(`VD-value` == "df65"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs65 = lrbVs65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df65")

lrbVs99 = inner_join(
  timeouts %>% filter(`VD-value` == "lrb"),
  timeouts %>% filter(`VD-value` == "df99"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
lrbVs99 = lrbVs99 %>% mutate(diff = unsolved.x - unsolved.y, other = "df99")

df99Vsdf65 = inner_join(
  timeouts %>% filter(`VD-value` == "df99"),
  timeouts %>% filter(`VD-value` == "df65"),
  by = c("Subfamily", "RE-value", "PH-value", "CE-value", "CL-value", "PR-value", "SH-value", "CA-value"))
df99Vsdf65 = df99Vsdf65 %>% mutate(diff = unsolved.x - unsolved.y, other = "df")


data <- union(lrbVs65, lrbVs99)
data <- union(data, df99Vsdf65)
           
print("df65 - lrb")
summary(data %>% filter(other == "df65") %>% pull(diff))
print("df99 - lrb")
summary(data %>% filter(other == "df99") %>% pull(diff))
print("df99 - df65")
summary(data %>% filter(other == "df") %>% pull(diff))

plot <- ggplot(data, aes(x = `other`, y = `diff`)) +
  geom_boxplot(outlier.shape = 4) +
  stat_boxplot(geom ='errorbar', width = 0.2) +
  labs(x = "", y = "Difference in number of timeouts", title = "All formula families") +
  theme(plot.margin=unit(c(2,2,-3,2),"mm")) +
  scale_x_discrete(labels = c('VSIDS .99 - .65', 'LRB - VSIDS .65','LRB - VSIDS .99'))
ggsave(filename = "plots/vd_lrb_vs_vsids.pdf", units = "cm", device = "pdf", width = 10, height = 6.5)
plot
```


# Restarts

Load main Batch

```{r}
library(RMySQL)
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
 `RE-value` in ('no', 'lbd', 'luE2', 'luE3')
 and `VD-value` in ('df95', 'df80', 'lrb')
 and `PH-value` in ('fix0', 'std')
 and `CE-value` in ('no', 'lin', 'glu', 'min')
 and `CA-value` in ('vsids', 'lbd', 'none')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```

```{r}
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95")

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `RE-value`)
       ) + 
  geom_point() + geom_line() + 
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
  labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/restarts.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")
```

```{r}
data = df %>% filter(Subfamily == "peb_pyrofpyr_neq3", `CE-value` == "glu", `PH-value` == "std", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df80")

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `RE-value`)
       ) + 
  geom_point() + geom_line() + 
  scale_colour_discrete(labels = c("LBD", "Luby 100", "Luby 1000", "no restarts")) +
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  labs(x = "Number of variables", y = "CPU time", color = "Restarts", title = "Pebbling on pyramid-of-pyramids graphs")
ggsave(filename = "plots/restarts2.pdf", units = "cm", device = "pdf", width = 10, height = 5)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")
```


# Database Size
```{r}
data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")

ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")

ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"), 
       aes(x = `Variables`,
           y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  labs(x = "Number of variables", y = "Million conflicts")
```


## Fancy Arrangement
```{r}
library(gridExtra)
library(cowplot)
data = df %>% filter(Subfamily == "tseitin_reggrid_5", `CA-value` == "lbd", `RE-value` == "lbd", `VD-value` == "df80", `PH-value` == "std", `PR-value` == "off")

plot1 <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  theme(plot.margin=unit(c(0,2,0,2),"mm"), legend.position = "bottom") +
  scale_colour_discrete(labels = c("glucose", "linear", "minisat")) +
  labs(x = "Number of variables", y = "CPU time", color = "Clause erasure:")

legend <- get_legend(plot1)
plot1 <- plot1 + theme(legend.position = "none")

plot2 <- ggplot(data %>% filter(`Solver-answer` == "UNSATISFIABLE"), 
       aes(x = `Variables`,
           y = `Conflicts` / 1000000, color = `CE-value`, group = `ID-configuration`)
       ) + 
  geom_point() + geom_line() +
  theme(plot.margin=unit(c(0,2,0,3),"mm"), legend.position = "none")+
  labs(x = "Number of variables", y = "Million conflicts")

titlePlot <- ggplot() + labs(title = "Tseitin formulas on grid graphs (5 rows)") +
  theme(plot.margin=unit(c(2,2,0,2),"mm"))

arrange <- arrangeGrob(plot1,plot2, ncol = 2)
arrange <- arrangeGrob(titlePlot, arrange, legend, heights = c(0.8,5,0.8) ,nrow = 3)
    
ggsave(filename = "plots/clauseErasure.pdf", arrange, units = "cm", device = "pdf", width = 10, height = 6)
```

# Phase Saving

Load Phase Saving Batch

```{r}
df <- dbGetQuery(db, "
  SELECT * FROM Experiment as main
  inner join Instance
  	on Instance.`ID-Instance` = main.`ID-Instance`
  inner join Configuration
  	on Configuration.`ID-configuration` = main.`ID-configuration`
where
    `Subfamily` = 'stone_width3chain_nhalfmarkers'
 and `RE-value` in ('lbd', 'luE3')
 and `VD-value` in ('df95', 'df80')
 and `PH-value` in ('fix0', 'std', 'dynrnd', 'fixrnd', 'ctr')
 and `CE-value` in ('glu', 'min')
 and `CA-value` in ('vsids', 'lbd')
 and `CL-value` in ('1uip')
 and `PR-value` in ('on', 'off')
 and `SH-value` in ('off');")
df$`RE-value` <- as.factor(df$`RE-value`)
df$`VD-value` <- as.factor(df$`VD-value`)
df$`PH-value` <- as.factor(df$`PH-value`)
df$`CE-value` <- as.factor(df$`CE-value`)
df$`CA-value` <- as.factor(df$`CA-value`)
df$`CL-value` <- as.factor(df$`CL-value`)
df$`PR-value` <- as.factor(df$`PR-value`)
df$`SH-value` <- as.factor(df$`SH-value`)
df <- df[, !duplicated(colnames(df))]
```

```{r}
data = df %>% filter(Subfamily == "stone_width3chain_nhalfmarkers", `CE-value` == "glu", `PR-value` == "off", `CA-value` == "lbd", `VD-value` == "df95", `RE-value` == "lbd")

plot <- ggplot(data, 
       aes(x = `Variables`,
           y = `CPU-time`, color = `PH-value`)
       ) + 
  geom_point() + geom_line() + 
  theme(legend.position = c(1.2,0.5), legend.direction = "vertical", legend.key.size = unit(1.2, "lines"), plot.margin=unit(c(1,25,1,2),"mm")) +
  scale_colour_discrete(labels = c("counter", "dynamic\nrandom", "fix zero", "fix random", "standard")) +
  labs(x = "Number of variables", y = "CPU time", color = "Phase:", title = "Stone formulas", subtitle = "on width 3 chain, #stones = #nodes / 2")
ggsave(filename = "plots/phase.pdf", units = "cm", device = "pdf", width = 10, height = 5.2)
plot + theme(legend.position = "bottom", legend.direction = "horizontal")
```
