diff --git a/doc/design-paper/node-selection/exit-capacity.R b/doc/design-paper/node-selection/exit-capacity.R new file mode 100644 index 0000000000..ece84e93d0 --- /dev/null +++ b/doc/design-paper/node-selection/exit-capacity.R @@ -0,0 +1,24 @@ +## Read data +t <- read.table("exit-capacity.dat", header=TRUE) + +## Normalize columns +t[,2] <- t[,2]/max(t[,2])*100 +t[,3] <- t[,3]/max(t[,3])*100 + +## Remove uninteresting ports +ports <- c(22, 25, 80, 119, 135, 443, + 563, 8080, 6667) +t <- t[t$port %in% ports,] + +## Plot +pdf("exit-capacity.pdf") +par(las=1) +col <- grey(c(1,4)/5) +barplot(t(as.matrix(t[,2:3])), names=t$port, + beside=TRUE, xlab="Port number", + ylab="Exit capacity available (%)", + col=col, cex.axis=0.8, cex.names=0.8) +par(xpd=TRUE) +legend(x="topright", legend=c("Nodes", "Bandwidth"), + fill=col, bty="n", inset=c(-0.05,-0.15)) +dev.off() diff --git a/doc/design-paper/node-selection/exit-capacity.dat b/doc/design-paper/node-selection/exit-capacity.dat new file mode 100644 index 0000000000..a23f8cf006 --- /dev/null +++ b/doc/design-paper/node-selection/exit-capacity.dat @@ -0,0 +1,14 @@ +port numExits totalBandwidth +21 115 39282983 +22 116 41341901 +25 5 1530934 +80 315 60290055 +119 16 13026366 +135 7 1709809 +443 326 60956345 +445 8 1730289 +563 102 24056338 +1314 307 55315711 +4661 7 1648369 +8080 307 55318361 +6667 116 41961767 diff --git a/doc/design-paper/performance.tex b/doc/design-paper/performance.tex index 41d7ea8dbb..748eaa3f24 100644 --- a/doc/design-paper/performance.tex +++ b/doc/design-paper/performance.tex @@ -21,6 +21,18 @@ \definecolor{MyLightGray}{gray}{.90} \definecolor{MyLightGreen}{rgb}{0.5, 0.99, 0.5} +\usepackage{xspace} +\makeatletter +\newcommand{\ie}{i.e.\@\xspace} +\newcommand{\eg}{e.g.\@\xspace} +\newcommand{\cf}{cf.\@\xspace} +\newcommand{\vs}{vs.\@\xspace} +\newcommand{\wrt}{w.r.t.\@\xspace} +\newcommand{\etal}{\textit{et al.\@\xspace}} +\newcommand{\detal}{\textit{et al.}} +\newcommand{\ia}{inter alia\xspace} +\makeatother + \newcommand{\thetitle}{Performance Improvements on Tor} \title{\thetitle} @@ -39,6 +51,36 @@ \maketitle +\section{Considering exit policy in node selection} + +When selecting an exit node for a circuit, a Tor client will build a list of all exit nodes which can carry the desired stream, then select from them with a probability weighted by each node's capacity\footnote{The actual algorithm is slightly more complex, in particular exit nodes which are also guard nodes will be weighted less, and there is also preemptive circuit creation}. +This means that nodes with more permissive exit policies will be candidates for more circuits, and hence will be more heavily loaded compared to nodes with restrictive policies. + +\begin{figure} +\includegraphics[width=\textwidth]{node-selection/exit-capacity} +\caption{Exit node capacity, in terms of number of nodes and advertised bandwidth for a selection of port numbers.} +\label{fig:exit-capacity} +\end{figure} + +\prettyref{fig:exit-capacity} shows the exit node capacity for a selection of port numbers. +It can be clearly seen that there is a radical difference in the availability of nodes for certain ports, generally those not in the default exit policy. +Any traffic to these ports will be routed through a small number of exit nodes, and if they have a permissive exit policy, they will likely become overloaded from all the other traffic they receive. +The extent of this effect will depend on how much traffic in Tor is to ports which are not in the default exit policy. + +The overloading of permissive exit nodes can be counteracted by adjusting the selection probability of a node based on its exit policy and knowledge of the network load per-port. +While it should improve performance, this modification will make it easier for malicious exit nodes to select traffic they wish to monitor. +For example, an exit node which wants to attack SSH sessions can currently list only port 22 in their exit policy. +Currently they will get a small amount of traffic compared to their capacity, but with the modification they will get a much larger share of SSH traffic. +However a malicious exit node could already do this, by artificially inflating their advertised bandwidth capacity. + +\subsection{Further work} + +To properly balance exit node usage, it is necessary to know the usage of the Tor network, by port. +McCoy \etal have figures for protocol usage in Tor, but these figures were generated through deep packet inspection, rather than by port number. +Furthermore, the exit node they ran used the fairly permissive default exit policy. +Therefore, their measurements will underestimate the relative traffic on ports which are present in the default exit policy, and are also present in more restrictive policies. +To accurately estimate the Tor network usage by port, it is necessary to measure the network usage by port on one or more exit nodes, while simultaneously recording the exit policy of all other exit nodes considered usable. + \section{Altering node selection algorithm} Currently Tor selects nodes with a probability proportional to their bandwidth contribution to the network, however this may not be the optimal algorithm. @@ -126,5 +168,9 @@ Tor makes this difficult because all cells where the payload is controlled by th Also, because the majority of cells sent in Tor are encrypted by a key not known by the attacker, the probability that an attacker can guess what a cell might be is extremely small. The exception is a padding cell, which has no circuit ID and a zero length payload, however Tor does not currently send padding cells, other than as a periodic keep-alive. +\subsection*{Acknowledgements} + +% Mike Perry + \end{document}