Political spaces code

# Set the state manually, over-ride with a command-line option if present:
$state = "sa";
if ($#ARGV == 0) {
  $state = $ARGV[0];
}

# The input file is the formal preferences by polling place.
# The AEC gives these names like
# SenateStateFirstPrefsByPollingPlaceDownload-20499-NT.csv
# which I've renamed to prefs_nt.csv, prefs_nsw.csv, etc.
$infile_votes = "<pref_files/prefs_" . $state . ".csv";

# CSV file with my party/group abbreviations:
$infile_groups = "<groups/" . $state . "_groups.csv";

# Output file to store the ATL votes with no other information:
$outfile_atl = ">atl_votes_" . $state . ".csv";

# Output files for the graph distances, defined in various ways:
$outfile_graph_max = ">graph_distances/distances_max_" . $state . "_blankmax.csv";
$outfile_graph_avg = ">graph_distances/distances_max_" . $state . "_blankavg.csv";
$outfile_graph_asym = ">graph_distances/distances_max_" . $state . "_blankmax_asym.csv";
$outfile_graph_asym_wtd = ">graph_distances/distances_max_" . $state . "_blankmax_asym_wtd.csv";

# Number of BTL candidates (I could work these out later but easier
# to set them here).
if ($state eq "tas") {
  $num_btl = 58;
} elsif ($state eq "wa") {
  $num_btl = 79;
} elsif ($state eq "sa") {
  $num_btl = 64;
} elsif ($state eq "qld") {
  $num_btl = 122;
} elsif ($state eq "nsw") {
  $num_btl = 151;
} elsif ($state eq "vic") {
  $num_btl = 116;
} elsif ($state eq "act") {
  $num_btl = 22;
} elsif ($state eq "nt") {
  $num_btl = 19;
}

# Read the party abbreviations:
open(in_file_groups, $infile_groups);
$line = <in_file_groups>;
@atl_groups = ();

while (<in_file_groups>) {
  $line = $_;
  chomp($line);
  
  # The CSV file may have commas in party names, but they
  # don't exist in the first column (abbreviations), so no
  # need for Text::CSV.
  @cells = split(",", $line);
  $this_group = $cells[0];
  $this_group =~ s/\"//g;
  
  push(@atl_groups, $this_group);
}

close(in_file_groups);


# Some useful variables for reading the preference lines:
$num_atl = $#atl_groups + 1;
$num_total = $num_atl + $num_btl;

$atl_i = $num_atl - 1;
$btl_start_i = $num_atl;
$btl_end_i = $num_total - 1;


# Initialise the "matrix" for each distances graph and the number
# of votes: stored as a hash, each element being an array equal to
# a row of the matrix.
%distances_max = {};
%distances_avg = {};
%distance_asym = {};
%distance_asym_wtd = {};

%N_votes = {};
%N_votes_asym = {};
%N_not_preffed = {};

for ($i = 0; $i < $num_atl; $i++) {
  $distances_max{$i} = ();
  $distances_avg{$i} = ();
  $distances_asym{$i} = ();
  $distances_asym_wtd{$i} = ();
  
  $N_votes{$i} = ();
  $N_votes_asym{$i} = ();
  $N_not_preffed{$i} = ();
  
  for ($j = 0; $j < $num_atl; $j++) {
    ${distances_max{$i}}[$j] = 0;
    ${distances_avg{$i}}[$j] = 0;
    ${distances_asym{$i}}[$j] = 0;
    ${distances_asym_wtd{$i}}[$j] = 0;
    ${N_votes{$i}}[$j] = 0;
    ${N_votes_asym{$i}}[$j] = 0;
    ${N_not_preffed{$i}}[$j] = 0;
  }
}

# Time to start reading the preferences file.
open(in_file_votes, $infile_votes);
open(out_file_atl, $outfile_atl);

print out_file_atl join(",", @atl_groups) . "\n";

# First line of the file is the column headers, second
# line is a bunch of hyphens, so skip over these.
$line = <in_file_votes>;
$line = <in_file_votes>;

$line_ct = 3;

while(<in_file_votes>) {
  $line = $_;
  chomp($line);
  
  if ($line_ct % 10000 == 0) {
    # Progress update:
    print $line_ct . "\n";
  }
  
  # The preferences are the only part of the line
  # inside quotation marks, so isolate that part:
  $pref_part = $line;
  $pref_part =~ s/.*?\"//;
  $pref_part =~ s/\"//;
  
  # Replace / and * with 1:
  $pref_part =~ s/\//1/g;
  $pref_part =~ s/\*/1/g;
  
  @full_prefs = split(",", $pref_part);
  
  @atl_prefs = @full_prefs[0..$atl_i];
  @btl_prefs = @full_prefs[$btl_start_i..$btl_end_i];
  
  # Check BTL formality.
  $formal_btl = 1;
  for ($i = 1; $i <= 6; $i++) {
    @this_numbers = grep { $_ == $i; } @btl_prefs;
    
    if ($#this_numbers != 0) {
      # Either a duplicated preference or an exhaust by preference 6.
      $formal_btl = 0;
      last;
    }
  }
  
  if ($formal_btl == 0) {
    # Vote is ATL.
    
    # Initialise an array to store the groups
    # in preference order:
    @this_prefs = ();
    
    # @atl_prefs might have duplicated preferences or
    # other irregularities, so initialise a new array
    # to be written to file:
    @atl_to_print = ();
    for ($i = 0; $i < $num_atl; $i++) {
      # Initialise to the last preference...
      # maybe this would be better left blank
      # and processed as needed in R.
      push(@atl_to_print, $num_atl);
    }
    
    for ($i = 1; $i <= $num_atl; $i++) {
      @this_numbers = grep { $atl_prefs[$_] == $i; } 0..$#atl_prefs;
      
      if ($#this_numbers != 0) {
        # Exhaust or duplicate.
        last;
      } else {
        push(@this_prefs, $this_numbers[0]);
        $atl_to_print[$this_numbers[0]] = $i;
      }
    }
    
    print out_file_atl join(",", @atl_to_print) . "\n";
    
    # Add distances.
    $this_prim = $this_prefs[0];
    
    # For all distance types except asym_wtd, first add
    # the "not preferenced" distance to all groups,
    # then afterwards subtract it from the actually
    # preferenecd groups.
    for ($i = 0; $i < $num_atl; $i++) {
      if ($i != $this_prim) {
        $distances_max{$this_prim}[$i] += $num_atl - 1;
        $distances_max{$i}[$this_prim] += $num_atl - 1;
        
        $distances_avg{$this_prim}[$i] += 0.5*($num_atl + $#this_prefs);
        $distances_avg{$i}[$this_prim] += 0.5*($num_atl + $#this_prefs);
        
        $distances_asym{$this_prim}[$i] += $num_atl - 1;
        
        $N_votes{$this_prim}[$i]++;
        $N_votes{$i}[$this_prim]++;
        $N_votes_asym{$this_prim}[$i]++;
        $N_not_preffed{$this_prim}[$i]++;
      }
    }
    
    for ($i = 1; $i <= $#this_prefs; $i++) {
      $this_pref = $this_prefs[$i];
      $distances_max{$this_prim}[$this_pref] += $i - ($num_atl - 1);
      $distances_max{$this_pref}[$this_prim] += $i - ($num_atl - 1);
      
      $distances_avg{$this_prim}[$this_pref] += $i - 0.5*($num_atl + $#this_prefs);
      $distances_avg{$this_pref}[$this_prim] += $i - 0.5*($num_atl + $#this_prefs);
      
      $distances_asym{$this_prim}[$this_pref] += $i - ($num_atl - 1);
      
      # For asym_wtd we don't have to subtract off anything:
      $distances_asym_wtd{$this_prim}[$this_pref] += $i;
      
      $N_not_preffed{$this_prim}[$this_pref]--;
    }
  }
  
  $line_ct++;
}

close(in_file);

$sum_not_preffed = 0;

for ($i = 0; $i < $num_atl; $i++) {
  for ($j = 0; $j < $i; $j++) {
    if ($N_votes{$i}[$j] > 0) {
      $distances_max{$i}[$j] /= $N_votes{$i}[$j];
      $distances_max{$j}[$i] /= $N_votes{$j}[$i];
      
      $distances_avg{$i}[$j] /= $N_votes{$i}[$j];
      $distances_avg{$j}[$i] /= $N_votes{$j}[$i];
      
      $distances_asym{$i}[$j] /= $N_votes_asym{$i}[$j];
      $distances_asym{$j}[$i] /= $N_votes_asym{$j}[$i];
      
      # Re-symmetrise the asymmetric distances:
      $distances_asym{$i}[$j] = 0.5*($distances_asym{$i}[$j] + $distances_asym{$j}[$i]);
      $distances_asym{$j}[$i] = $distances_asym{$i}[$j];
    }
  }
  
  # Preparation for the weighting of the not-preferenced votes:
  for ($j = 0; $j < $num_atl; $j++) {
    if ($i != $j) {
      # Careful with the order of the indices here.
      # In braces is the primary vote party; we
      # want to sum a party's not-preferenced votes
      # over all primary vote possibilities.
      $N_not_preffed{$i}[$i] += $N_not_preffed{$j}[$i];
    }
  }
  
  $sum_not_preffed += $N_not_preffed{$i}[$i];
}

$mean_not_preffed = $sum_not_preffed / $num_atl;

# Add the weighted not-preferenced votes:
for ($i = 0; $i < $num_atl; $i++) {
  for ($j = 0; $j < $num_atl; $j++) {
    if ($i != $j) {
      $distances_asym_wtd{$i}[$j] += ($num_atl - 1) * $N_not_preffed{$i}[$j] * $mean_not_preffed / $N_not_preffed{$j}[$j];
      $distances_asym_wtd{$i}[$j] /= $N_votes_asym{$i}[$j];
    }
  }
}

# Re-symmetrise:
for ($i = 0; $i < $num_atl; $i++) {
  for ($j = 0; $j < $i; $j++) {
    if ($i != $j) {
      $distances_asym_wtd{$i}[$j] = 0.5*($distances_asym_wtd{$i}[$j] + $distances_asym_wtd{$j}[$i]);
      $distances_asym_wtd{$j}[$i] = $distances_asym_wtd{$i}[$j];
    }
  }
}

open(out_file_graph_max, $outfile_graph_max);
open(out_file_graph_avg, $outfile_graph_avg);
open(out_file_graph_asym, $outfile_graph_asym);
open(out_file_graph_asym_wtd, $outfile_graph_asym_wtd);

print out_file_graph_max join(",", @atl_groups) . "\n";
print out_file_graph_avg join(",", @atl_groups) . "\n";
print out_file_graph_asym join(",", @atl_groups) . "\n";
print out_file_graph_asym_wtd join(",", @atl_groups) . "\n";

for ($i = 0; $i < $num_atl; $i++) {
  $out_line_max = "";
  $out_line_avg = "";
  $out_line_asym = "";
  $out_line_asym_wtd = "";
  
  # There's probably a way to de-reference the hash
  # so that you can just join() the array and print
  # it to file, but I don't know how to do that.
  # Instead I loop:
  for ($j = 0; $j < $num_atl; $j++) {
    $comma = ",";
    if ($j == $num_atl - 1) { $comma = ""; }
    
    $out_line_max .= $distances_max{$i}[$j] . $comma;
    $out_line_avg .= $distances_avg{$i}[$j] . $comma;
    $out_line_asym .= $distances_asym{$i}[$j] . $comma;
    $out_line_asym_wtd .= $distances_asym_wtd{$i}[$j] . $comma;
  }
  
  print out_file_graph_max $out_line_max . "\n";
  print out_file_graph_avg $out_line_avg . "\n";
  print out_file_graph_asym $out_line_asym . "\n";
  print out_file_graph_asym_wtd $out_line_asym_wtd . "\n";
}


close(out_file_graph_max);
close(out_file_graph_avg);
close(out_file_graph_asym);
close(out_file_graph_asym_wtd);
close(out_file_atl);

if (state == "nsw") {
  state_title = "NSW"
  lib_abbrev = "LNP"
  alp_htv = c("Grn", "REP", "AJP", "ASXP", "LDP")
  lib_htv = c("CDP", "SFF", "FF", "LDP", "AMEP")
}

if (state == "vic") {
  state_title = "Vic"
  lib_abbrev = "LNP"
  alp_htv = c("ASXP", "DHJP", "REP", "Grn", "S/C")
  lib_htv = c("FF", "AC", "DLP", "ACP", "DHJP")
}

if (state == "qld") {
  state_title = "Qld"
  lib_abbrev = "LNP"
  alp_htv = c("Grn", "S/H", "JLN", "GLT", "KAP")
  lib_htv = c("FF", "CDP", "SFF", "KAP", "AC")
}

if (state == "wa") {
  state_title = "WA"
  lib_abbrev = "Lib"
  alp_htv = c("Grn", "H/S", "DHJP", "SFF", "Nat")
  lib_htv = c("Nat", "AC", "SFF", "LDP", "FF")
}

if (state == "sa") {
  state_title = "SA"
  lib_abbrev = "Lib"
  alp_htv = c("Grn", "AJP", "ME", "NXT", "AMEP")
  lib_htv = c("FF", "CDP", "SFF", "AMEP", "NXT")
}

if (state == "tas") {
  state_title = "Tas"
  lib_abbrev = "Lib"
  alp_htv = c("Grn", "JLN", "ARFP", "S/H", "REP")
  lib_htv = c("CDP", "SFF", "LDP", "FF", "ALP")
}

if (state == "act") {
  state_title = "ACT"
  lib_abbrev = "Lib"
  alp_htv = c("Grn", "ASXP", "AJP", "SPA", "LDP")
  lib_htv = c("CDP", "LDP", "AJP", "SA", "ALP")
}

if (state == "nt") {
  state_title = "NT"
  lib_abbrev = "CLP"
  alp_htv = c("Grn", "H/S", "CDP", "CLP", "CEC")
  lib_htv = c("CDP", "ALP", "H/S", "Grn", "RUA")
}

if (state %in% c("act", "nt")) {
  phon_rua = "RUA"
} else {
  phon_rua = "PHON"
}

alp_htv = c("ALP", alp_htv)
lib_htv = c(lib_abbrev, lib_htv)

library(dplyr)
library(readr)
library(ggplot2)
library(ggrepel)

state = "nsw"

# Garbage collection -- if you delete a variable,
# it may still take up space in memory, but gc()
# will free it up.
gc()


source("htv_abbrevs.R")
if (state %in% c("act", "nt")) {
  # Only highlight the first three parties
  # on the HTV's in the territories.
  alp_htv = alp_htv[1:3]
  lib_htv = lib_htv[1:3]
}

png_print = function(out_file, img_width, img_height, ggplot_img) {
  # A function to print a plot to png:
  png(filename=out_file, width=img_width, height=img_height, units="px")
  print(ggplot_img)
  dev.off()
  return(TRUE)
}

in_file = sprintf("atl_votes_%s.csv", state)
prefs.df = read_csv(in_file)

# The calculation:
pref_pca = prcomp(prefs.df, scale. = TRUE)

pca_evals = pref_pca$sdev^2
pca_evals_cumul = cumsum(pca_evals)
fraction_variance = pca_evals_cumul / sum(pca_evals)

# A bit of fiddly work to classify the groups
# into HTV cards:
pca.df = as.data.frame(pref_pca$rotation)
pca.df$name = row.names(pca.df)
pca.df$ALP_HTV = ""
pca.df$Lib_HTV = ""

pca.df$ALP_HTV[which(pca.df$name %in% alp_htv)] = "ALP"
pca.df$Lib_HTV[which(pca.df$name %in% lib_htv)] = "Lib"

pca.df$HTV = sprintf("%s%s", pca.df$ALP_HTV, pca.df$Lib_HTV)
pca.df$HTV = gsub("ALPLib", "Both", pca.df$HTV)
pca.df$HTV[which(pca.df$HTV == "")] = "Neither"


# Orienting the axes: put ALP left of LNP,
# One Nation in the upper half-plane.
ALP_i = which(pca.df$name == "ALP")
LNP_i = which(pca.df$name == lib_abbrev)
PHON_i = which(pca.df$name == phon_rua)

if (pca.df$PC1[ALP_i] > pca.df$PC1[LNP_i]) {
  pca.df$PC1 = -pca.df$PC1
}

if (pca.df$PC2[PHON_i] < 0) {
  pca.df$PC2 = -pca.df$PC2
}

title_str = sprintf("PCA: %s ATL preferences", state_title)

out_file_png = sprintf("images/pca/%s_pc1_pc2.png", state)
out_file_variance = sprintf("pca_data/%s_fraction_variance.txt", state)
out_file_pca = sprintf("pca_data/%s_df.Rda", state)

htv_colours = c(ALP = "#FF0000", Lib = "#0000FF", Both = "#800080", Neither = "#000000")

circle_plot = ggplot(data=pca.df) +
  geom_point(aes(x=PC1, y=PC2, colour=factor(HTV))) +
  geom_text_repel(aes(x=PC1, y=PC2, label=name, colour=factor(HTV)), size=6, show.legend=FALSE) +
  scale_color_manual(values=htv_colours, name="HTV cards") +
  theme_grey(base_size = 16) +
  guides(colour = guide_legend(override.aes = list(size=4))) +
  theme(legend.text=element_text(size=16)) +
  ggtitle(title_str)

# The calculation's not terribly slow, but it's
# quicker to load the factor loadings from disk than
# to calculate it again....
write(fraction_variance, file=out_file_variance, ncolumns = 1)
save("pca.df", file=out_file_pca)

png_print(out_file_png, 600, 500, circle_plot)

library(dplyr)
library(readr)
library(ggplot2)
library(ggrepel)

# If you want to plot the convergence from
# the initial randomised locations to 
# equilibrium, set to TRUE (slows it down a lot):
plot_intermediate_conv = FALSE

# If you want to plot individual trials as
# they're calculated, set to TRUE
# (they'll be over-written with a fixed set
# of axes afterwards, but it can be nice to
# check that it's running properly):
plot_intermediate = FALSE

# Each trial has a new randomised set of start
# locations.  How many trials to do?
num_trials = 50


# The parameters to loop over.
blank_types = c("max", "avg", "max_asym", "max_asym_wtd")
distance_powers = c(1, 3)
states = c("nsw", "vic", "qld", "wa", "sa", "tas", "act", "nt")


png_print = function(out_file, img_width, img_height, ggplot_img) {
  # A function to print a plot to png:
  png(filename=out_file, width=img_width, height=img_height, units="px")
  print(ggplot_img)
  dev.off()
  return(TRUE)
}

make_plot = function(x, group_names, HTV, htv_colours, title_str, x_lim=NULL, y_lim=NULL) {
  # The main plotting routine.
  locations.df = data.frame(x = x[, 1], y = x[, 2], names=group_names, HTV = HTV)
  
  scatter_plot = ggplot() + 
    geom_point(data=locations.df, aes(x=x, y=y, colour=factor(HTV))) +
    geom_text_repel(data=locations.df, aes(x=x, y=y, label=names, colour=factor(HTV)),
                    size=6, show.legend=FALSE) +
    scale_x_continuous(limits=x_lim) + scale_y_continuous(limits=y_lim) +
    coord_fixed(ratio = 1) +
    scale_color_manual(values=htv_colours, name="HTV cards") +
    theme_grey(base_size = 16) +
    guides(colour = guide_legend(override.aes = list(size=4))) +
    theme(legend.text=element_text(size=16)) +
    ggtitle(title_str)
  
  return(scatter_plot)
}

htv_colours = c(ALP = "#FF0000", Lib = "#0000FF", Both = "#800080", Neither = "#000000")

# The big nested loops:
for (distance_power in distance_powers) {
  for (blank_type in blank_types) {
    
    output_folder = sprintf("images/force_directed/trials/%s_%d", blank_type, distance_power)
    dir.create(output_folder, showWarnings = FALSE)
    
    for (state in states) {
      print(sprintf("***** Starting %s %s %d *****", state, blank_type, distance_power))
      
      source("htv_abbrevs.R")
      
      if (state %in% c("act", "nt")) {
        alp_htv = alp_htv[1:3]
        lib_htv = lib_htv[1:3]
      }
      
      in_file = sprintf("graph_distances/distances_max_%s_blank%s.csv", state, blank_type)
      
      distances.df = read_csv(in_file)
      
      group_names = names(distances.df)
      num_groups = length(group_names)
      num_dims = 2
      
      ALP_i = which(group_names == "ALP")
      LNP_i = which(group_names == lib_abbrev)
      PHON_i = which(group_names == phon_rua)
      
      ALP_HTV = character(num_groups)
      Lib_HTV = character(num_groups)
      
      ALP_HTV[which(group_names %in% alp_htv)] = "ALP"
      Lib_HTV[which(group_names %in% lib_htv)] = "Lib"
      
      HTV = sprintf("%s%s", ALP_HTV, Lib_HTV)
      HTV = gsub("ALPLib", "Both", HTV)
      HTV[which(HTV == "")] = "Neither"
      
      title_str = sprintf("Force-directed, %s,\n%s, d^%d", state_title, blank_type, distance_power)
      
      # Here we go.  Normalise the distance matrix so that its
      # highest element is 1, and raise all values to the 
      # current power:
      dist_matrix = as.matrix(distances.df)
      dist_matrix = dist_matrix / max(dist_matrix)
      dist_matrix = dist_matrix^distance_power
      
      # Initialise a matrix to store the locations of each
      # trial, and of the mean values:
      x = matrix(0, nrow=num_groups, ncol = num_dims)
      mean_x = 0*x
      
      # Spring constant:
      attraction_const = 1
      
      # Increment value:
      inc = 5e-2
      
      # Tolerance to decide if it has converged:
      tol = 1e-6
      
      # Shouldn't need anywhere near a million iterations:
      max_iter = 1000000
      
      # The final co-ordinates will be stored in list_x,
      # for retrieval at the end of the trials set:
      list_x = list()
      
      # First row min, second row max:
      axis_bounds = matrix(0, nrow=2, ncol=num_dims)
      
      # Here we go.
      for (i_trial in 1:num_trials) {
        print(sprintf("Starting trial %d", i_trial))
        
        # Randomise the start locations:
        for (i in 1:num_dims) {
          x[ , i] = runif(num_groups)
        }
        
        if (plot_intermediate_conv) {
          locations.df = data.frame(x = x[, 1], y=x[, 2], names=group_names)
          
          this_plot = ggplot() + 
            geom_text(data=locations.df, aes(x=x, y=y, label=names)) +
            ggtitle("Initial")
          
          png_print("images/force_directed/convergence/0000.png", 500, 500, this_plot)
        }
        
        
        for (i_iter in 1:max_iter) {
          if (i_iter %% 10000 == 0) {
            print(i_iter)
          }
          
          prev_x = x
          
          # r will store the vectors between party locations,
          # one list entry for each vector component, each
          # list entry a matrix (i, j).
          r = list()
          length(r) = num_dims
          
          for (i in 1:num_dims) {
            r[[i]] = outer(x[ , i], rep(1, num_groups)) - outer(rep(1, num_groups), x[ , i])
            
            # current_dist is a matrix soon to hold the Euclidean
            # distance between each pair of points:
            if (i == 1) {
              current_dist = r[[i]]*r[[i]]
            } else {
              current_dist = current_dist + r[[i]]*r[[i]]
            }
          }
          
          current_dist = sqrt(current_dist)
          
          # Which direction are the springs forcing in?
          spring_signs = 2*(current_dist > dist_matrix) - 1
          
          # Calculate forces and increment locations:
          for (i in 1:num_dims) {
            F_springs_i = -attraction_const * spring_signs * (abs(current_dist - dist_matrix)) * r[[i]]
            F_i = rowSums(F_springs_i)
            x[ , i] = x[ , i] + inc*F_i
          }
          
          if (plot_intermediate_conv) {
            out_file = sprintf("images/force_directed/convergence/%04d.png", i_iter)
            
            locations.df = data.frame(x = x[, 1], y=x[, 2], names=group_names)
            
            this_plot = ggplot() + 
              geom_text(data=locations.df, aes(x=x, y=y, label=names)) +
              ggtitle(i_iter)
            
            png_print(out_file, 500, 500, this_plot)
          }
          
          if (abs(max(x - prev_x)) < tol) {
            # Convergence!
            break
          }
        }
        
        
        # Translate and rotate.
        
        ALP_pos = as.vector(x[ALP_i, ])
        LNP_pos = as.vector(x[LNP_i, ])
        
        mid_x = 0.5*(ALP_pos[1] + LNP_pos[1])
        mid_y = 0.5*(ALP_pos[2] + LNP_pos[2])
        
        delta_x = LNP_pos[1] - ALP_pos[1]
        delta_y = LNP_pos[2] - ALP_pos[2]
        
        x[ , 1] = x[ , 1] - mid_x
        x[ , 2] = x[ , 2] - mid_y
        
        theta = atan2(delta_y, delta_x)
        
        rot_matrix = matrix(0, nrow=2, ncol=2)
        rot_matrix[1, ] = c(cos(theta), sin(theta))
        rot_matrix[2, ] = c(-sin(theta), cos(theta))
        
        x = t(rot_matrix %*% t(x))
        
        # Flip secondary axis if necessary:
        if (x[PHON_i, 2] < 0) {
          x[ , 2] = -x[ , 2]
        }
        
        
        if (i_trial == 1) {
          mean_x = x
        } else {
          # The PHON_i lines above are supposed to ensure that
          # One Nation appears in the upper half-plane, but maybe
          # it's close to y=0 and it ended up in the lower half-plane.
          # So, check if the current arrangement is close to the current one.
          
          test_x = x
          test_x[ , 2] = -test_x[ , 2]
          
          diff_1 = sum(abs(mean_x - x))
          diff_2 = sum(abs(mean_x - test_x))
          
          if (diff_2 < diff_1) {
            x = test_x
          }
          
          mean_x = mean_x + x
        }
        
        # Write/print to file:
        out_file_png = sprintf("%s/%s_%04d.png", output_folder, state, i_trial)
        out_file_csv = sprintf("%s/%s_%04d.csv", output_folder, state, i_trial)
        
        if (plot_intermediate) {
          scatter_plot = make_plot(x, group_names, HTV, htv_colours, title_str)
          png_print(out_file_png, 600, 500, scatter_plot)
        }
        
        # Store the final co-ordinates, update the overall
        # axis bounds if necessary.
        list_x[[i_trial]] = x
        
        if (i_trial == 1) {
          for (i in 1:num_dims) {
            axis_bounds[1, i] = min(x[ , i])
            axis_bounds[2, i] = max(x[ , i])
          }
        } else {
          for (i in 1:num_dims) {
            this_min = min(x[, i])
            this_max = max(x[ ,i])
            
            if (this_min < axis_bounds[1, i]) {
              axis_bounds[1, i] = this_min
            }
            
            if (this_max > axis_bounds[2, i]) {
              axis_bounds[2, i] = this_max
            }
          }
        }
        
        write(t(x), file = out_file_csv, ncolumns = num_dims, sep = ",")
      }
      
      # Re-print the intermediate plots, now with fixed axis extents.
      x_bounds = c(axis_bounds[1, 1], axis_bounds[2, 1])
      y_bounds = c(axis_bounds[1, 2], axis_bounds[2, 2])
      
      for (i_trial in 1:num_trials) {
        x = list_x[[i_trial]]
        out_file = sprintf("%s/%s_%04d.png", output_folder, state, i_trial)
        
        scatter_plot = make_plot(x, group_names, HTV, htv_colours, title_str, x_bounds, y_bounds)
        png_print(out_file, 600, 500, scatter_plot)
      }
      
      # And now the final plot:
      mean_x = mean_x / num_trials
      
      out_file = sprintf("images/force_directed/%s_%s_%d.png", state, blank_type, distance_power)
      
      final_plot = make_plot(mean_x, group_names, HTV, htv_colours, title_str)
      png_print(out_file, 600, 500, final_plot)
    }
  }
}