\documentclass{article}
\usepackage{fullpage}
\usepackage{amssymb}
\usepackage{graphicx}
\setlength{\parskip}{.4cm}
\setlength{\baselineskip}{15pt}
\setlength{\parindent}{0cm}
\newcounter{probcnt}
\newenvironment{problem}{\stepcounter{probcnt}{\bf Problem \arabic{probcnt}}.}{}
% Sariel's macros
\newcommand{\MakeSBig}{\rule[0.0cm]{0.0cm}{0.35cm}} % really small
\newcommand{\ceil}[1]{\left\lceil {#1} \right\rceil}
\newcommand{\EntropyChar}{\mathbb{H}}
\newcommand{\pth}[2][\!]{#1\left({#2}\right)}
\newcommand{\EntropyX}[1]{\EntropyChar\pth[]{#1}}
\newcommand{\pbrcx}[1]{\left[ {#1} \right]}
\newcommand{\Ex}[1]{\mathop{\mathbf{E}}\!\pbrcx{#1}}
\newcommand{\Prob}[1]{\mathop{\mathbf{Pr}}\!\pbrcx{#1}}
\newcommand{\cardin}[1]{\left| {#1} \right|}
\newcommand{\CF}{{\cal F}}
\author{}
\date{}
\begin{document}
\hrule
\begin{center}
{\LARGE \sc Qualifying Examination} \\[1ex]
{\Large \sc Theoretical Computer Science}\\[1ex]
{\sc Friday, February 15, 2008}\\[1ex]
\vspace*{0.5in}
{\Large \sc Part I: Algorithms}
\end{center}
\hrule
\vfill
\begin{center}
\large
\begin{tabular}{|l||p{2in}|}
\hline
& \\
{\bf ID Number} & \\
& \\
\hline
& \\
{\bf Pseudonym} & \\
& \\
\hline
\end{tabular}
\end{center}
\vfill
{\LARGE
\begin{center}
\begin{tabular}{|c|c|c|c|}
\hline
Problem & Maximum Points & Points Earned & Grader\\
\hline
\hline
1 & 50 & & \\ \hline
2 & 50 & & \\ \hline
3 & 50 & & \\ \hline
4 & 50 & & \\ \hline
Total & 200 & & \\ \hline
\end{tabular}
\end{center}
}
~
\newpage
{\bf Instructions:}
\begin{enumerate}
\item This is a closed book exam.
\item The exam is for 3 hours and has four problems of 50 points each.
Read all the problems carefully to see the order in which you want
to tackle them.
\item Write clearly and concisely. You may appeal to some standard
algorithms/facts from text books unless the problem explicitly asks
for a proof of that fact or the details of that algorithm.
\item If you cannot solve a problem, to get partial credit write down
your main idea/approach in a clear and concise way. For example you
can obtain a solution assuming a clearly stated lemma that you
believe should be true but cannot prove during the exam. However,
please do not write down a laundry list of half baked ideas just to
get partial credit.
\end{enumerate}
~\\~\\
May the force be with you.
\newpage
\renewcommand{\labelenumi}{(\Alph{enumi})}
\begin{problem}
\emph{Arithmetic coding} is a standard compression method. In the
case when the string to be compressed is a sequence of biased coin
flips, it can be described as follows. Suppose that we have a
sequence of bits $X = ( X_1, X_2, \ldots, X_n)$, where each $X_i$
is independently $0$ with probability $p$ and $1$ with probability
$1-p$. The sequences can be ordered lexicographically, so for $x=
(x_1, x_2 , \ldots, x_n$ and $y = (y_1, y_2, \ldots, y_n)$, we say
that $x< y$ if $x_i=0$ and $y_i=1$ in the first coordinate $i$
such that $x_i \ne y_i$. If $z(x)$ is the number of zeroes in the
string $x$, then define $p(x) = p^{z(x)}(1-p)^{n-z(x)}$ and
\[
q(x) = \sum_{y < x} p(y).
\]
\begin{enumerate}
\item {\bf [10 points]} Suppose we are given $X = (X_1, X_2, \ldots,
X_n)$. Explain how to compute $q(X)$ in time $O(n)$ (assume that any
reasonable operation on real numbers takes constant time).
\item {\bf 10 points]} Argue that the intervals $\;\left[q(x), q(x) +
p(x) \MakeSBig \right)$ are disjoint subintervals of $[0,1)$.
\item {\bf 15 points]} Given (A) and (B), the sequence $X$ can be
represented by any point in the interval $I(X) =
\left[q(X),\MakeSBig q(X) + p(X)\right)$. Show that we can choose
a codeword in $I(X)$ with $\ceil{ \lg (1/p(X))} + 1$ binary
decimal digits to represent $X$ in such a way that no codeword
is the prefix of any other codeword.
\item {\bf [15 points]} Given a codeword chosen as in (C), explain how
to decompress it to determine the corresponding sequence $(X_1, X_2,
\ldots, X_n)$.
\item {\bf [Bonus Points]} Using the Chernoff inequality, argue that
$\lg(1/p(X))$ is close to $n\EntropyX{p}$ with high
probability. Thus, this approach yields an effective compression
scheme.
\end{enumerate}
\noindent{\textbf{Reminder}}: Chernoff inequality states that for $n$
random (and independent) variables $X_1, \ldots, X_n$, $Y =\sum_i X_i$
and $\mu = \Ex{Y}$, the following holds.
\begin{table}[h]
\begin{tabular}{|l|l|}
\hline
%%%%%%%%%%%%%%%%%%%%%
For $\delta \leq 2e -
1$
&
${\rule[-.2cm]{0cm}{0.7cm}}\Prob{ Y > (1+\delta)\mu } < \exp \pth{-\mu
\delta^2 / 4}$
\\
~~~~ $\delta \geq 2e - 1$ &
$\Prob{ Y > (1+\delta)\mu } < 2^{-\mu (1+\delta)}$
\\
\hline
For $\delta \geq 0\MakeSBig$ &
$\displaystyle
\Prob{ Y < (1-\delta)\mu } < \exp \pth{-\mu\delta^2/2}
$
\\
\hline \hline
\end{tabular}
\end{table}
Also, for a random variable $X$ its \emph{entropy} is
\[
\EntropyX{X} = \sum_x \Prob{X = x} \lg \frac{1}{\Prob{X = x}}.
\]
\end{problem}
\begin{problem}
Given a graph $G=(V,E)$ and two nodes $s,t$ the $s,t$ longest path
problem is to find a {\em simple} path from $s$ to $t$ of the longest
possible length. Suppose a famous researcher has shown that unless
$P=NP$ the $s,t$ longest path problem does not admit a
$2$-approximation. Use this to show that unless $P=NP$ the $s,t$
longest path problem does not admit an $\alpha$-approximation for any
fixed constant $\alpha$.
\end{problem}
\begin{problem}
Let $T$ be a rooted, ordered tree. An \emph{internal edge} of $T$ is
an edge that is not incident to a leaf. A \emph{contraction} of $T$
is any tree obtained from $T$ by contracting a subset of the internal
edges. (See the figure below.) Because only internal edges are
contracted, every contraction of $T$ has the same number of leaves as
$T$.
\begin{center}\footnotesize\sf
\includegraphics[height=1.5in]{ntrees}\\
The middle tree is the largest common contraction of the left and right trees.
\end{center}
Describe and analyze an efficient algorithm to compute the
\emph{largest common contraction} of two given rooted, ordered trees.
Assume both input trees have the same number of leaves.
\end{problem}
\begin{problem}
In this problem you wish to develop a randomized algorithm that given
a directed graph $G=(V,E)$ and an integer $k$ decides whether $G$ has
a simple path of length at least $k$. The algorithm's running time
will be $O(c^k poly(n))$ for some constant $c$. The algorithm consists
of two steps. In the first step the algorithm picks independently for
each vertex $u \in V$ a random color from $\{1,2, \ldots, k\}$
according to the uniform distribution. In the second step the
algorithm finds a longest {\em multi-colored} path --- that is, a path
consisting of vertices with different colors.
\begin{enumerate}
\item {\bf [15 points]} Suppose $P$ is a path of length $k$ (contains
$k$ vertices) in $G$. What is the {\em precise} probability that the
vertices in $P$ get different colors in the first step. Simplify the
expression to show that it is at least $1/e^k$ where $e$ is the base
of the natural logarithm.
\item {\bf [25 points]} Suppose vertices of $G$ are colored using
$\{1,2, \ldots, k\}$. Give an algorithm that finds the longest
multi-colored path in $G$ in $O(c^k poly(n))$ time; $c=4$ should
suffice. {\em Hint: use dynamic programming.}.
\item {\bf [10 points]} Show, using (a) and (b) that there is a
randomized algorithm that in time $O(c^k poly(n))$ time decides with
high probability whether $G$ has a path of length $k$ or not.
\end{enumerate}
\end{problem}
$\underline{\mbox{{\sc\bf Additional Problems}}}$
\begin{problem}
Given an edge-weighted graph $G$, finding the exact
\emph{maximum-weight planar subgraph} of~$G$ is NP-hard. Describe and
analyze a polynomial-time $O(1)$-approximation algorithm for this
problem. For 20 points of partial credit, describe an
$O(1)$-approximation algorithm for \emph{unweighted} graphs.
\end{problem}
\begin{problem}
Let $(X, \CF)$ be a set system, where $\cardin{X} = n$, and
$\cardin{\CF} = m = n^{O(1)}$, where $\CF$ is a set of subsets of
$X$. Assume, that with every set $Y$ of $\CF$ there is an associated
weight $w(Y)$.
We are interested in computing the minimum weight cover of $X$ by a
family of sets in $\CF$. The greedy algorithm performs this by
repeatedly picking the set $Y \in \CF$, such that $w(Y)/m(Y)$ is
minimized, where $m(Y)$ is the number of new elements in $X$ that are
newly covered by adding $Y$ to the current cover.
\begin{enumerate}
\item Prove that the greedy algorithm in the worst case provides $O(
\log n)$ approximation to the minimum weight cover of
$(X,\CF)$. (Proving this to the unweighted case would give you
partial credit.)
\item For any $k$ and $n$, provide an example showing that the greedy
algorithm is tight.
\end{enumerate}
\end{problem}
\begin{problem}
Let $G=(V,E)$ be a directed graph representing a flow network.
The edge capacities are given by $c$ with $c(e)$ as the integer
edge capacity. For a set $X$ of nodes, $\delta^+(X)$ and
$\delta^-(X)$ denote the set of edges leaving $X$ and entering $X$
respectively. Let $s,t$ be two distinct nodes, the source and
sink of the flow network. A cut between $s$ and $t$ corresponds
to a set $A$ such that $s \in A$ and $t \not \in A$. The capacity
of the cut $A$ is simply $c(\delta^+(A)) = \sum_{e \in
\delta^+(A)} c(e)$. We are interested in the {\em minimum cuts}
between $s$ and $t$. Call a min-cut $A$ {\em minimal} if there is
no proper subset $B \subset A$ such that $B$ is also a
minimum-cut.
\begin{enumerate}
\item Show that there is a {\em unique} minimal min-cut. {\em
Hint:} Show and use the fact that if $A$ and $A'$ are cuts
then $\delta^+(A) + \delta^+(A') \ge \delta^+(A \cap A') +
\delta^+(A \cup A')$.
\item Give an algorithm to compute the minimal min-cut.
\end{enumerate}
\end{problem}
\begin{problem}
Consider the following optimization problem. The input is a
directed graph $G=(V,E)$ with edge costs $c:E \rightarrow {\cal
Z}$ and $k$ node pairs $(s_1,t_1), (s_2,t_2), \ldots,
(s_k,t_k)$. In addition each pair $(s_i,t_i)$ has an integer
requirement $r_i > 0$. The objective is to find a minimum-cost
subset $E' \subseteq E$ of edges such that the graph $G'=(V,E')$
has $r_i$ edge-disjoint paths between $s_i$ and $t_i$ for $1 \le i
\le k$.
\begin{enumerate}
\item Write an integer linear program for the above problem
and sketch a proof that it is a correct.
\item Indicate how your formulation would change if the graph
is undirected.
\end{enumerate}
\end{problem}
\begin{problem}
Consider the following random process. At each time step a number from
$\{1, 2, \ldots, n\}$ is picked uniformly at random.
\begin{enumerate}
\item Let $T$ be the time when all numbers $\{1,2, \ldots, n\}$ are
seen. What is expected value of $T$?
\item Bonus: prove that $Pr[T > c E[T]] < 1/n$ for a sufficiently large
constant $c$.
\end{enumerate}
\end{problem}
\end{document}