% TODO
%   - definite typechecker/compiler implies it only looks at the
%   installed unit database, not TRUE


\documentclass{article}

\usepackage{pifont}
\usepackage{graphicx} %[pdftex] OR [dvips]
\usepackage{fullpage}
\usepackage{wrapfig}
\usepackage{float}
\usepackage{titling}
\usepackage{hyperref}
\usepackage{tikz}
\usepackage{color}
\usepackage{footnote}
\usepackage{float}
\usepackage{algpseudocode}
\usepackage{bigfoot}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{framed}

% Alter some LaTeX defaults for better treatment of figures:
% See p.105 of "TeX Unbound" for suggested values.
% See pp. 199-200 of Lamport's "LaTeX" book for details.
%   General parameters, for ALL pages:
\renewcommand{\topfraction}{0.9}	% max fraction of floats at top
\renewcommand{\bottomfraction}{0.8}	% max fraction of floats at bottom
%   Parameters for TEXT pages (not float pages):
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{2}
\setcounter{totalnumber}{4}     % 2 may work better
\setcounter{dbltopnumber}{2}    % for 2-column pages
\renewcommand{\dbltopfraction}{0.9}	% fit big float above 2-col. text
\renewcommand{\textfraction}{0.07}	% allow minimal text w. figs
%   Parameters for FLOAT pages (not text pages):
\renewcommand{\floatpagefraction}{0.7}	% require fuller float pages
% N.B.: floatpagefraction MUST be less than topfraction !!
\renewcommand{\dblfloatpagefraction}{0.7}	% require fuller float pages

% remember to use [htp] or [htpb] for placement

\newcommand{\I}[1]{\ensuremath{\mathit{#1}}}

\newcommand{\optionrule}{\noindent\rule{1.0\textwidth}{0.75pt}}

\newenvironment{aside}
  {\begin{figure}\def\FrameCommand{\hspace{2em}}
   \MakeFramed{\advance\hsize-\width}\optionrule\small}
{\par\vskip-\smallskipamount\optionrule\endMakeFramed\end{figure}}

\setlength{\droptitle}{-6em}

\newcommand{\Red}[1]{{\color{red} #1}}

\title{The Backpack algorithm}

\begin{document}

\maketitle

\noindent Backpack introduces the \emph{Backpack language}, which is
used to define \emph{Backpack units} that provide some modules given
implementations for some signatures.  A Backpack unit can be
immediately typechecked and elaborated into core; at some later point,
each unit may then be compiled one or more times with different
instantiations of its requirements.

A unit is typechecked against the indefinite unit database
(and the installed unit database, for units that have no
requirements) in three steps:

\begin{enumerate}
    \item The \textbf{dependency solver} computes the module and unit
    dependency structure of the declarations in a unit.  Specifically,
    it produces (1) the set of \I{ModuleName}s which are required
    by the unit, (2) a directed acyclic graph labeled by \I{Module} of
    the modules and signatures of the unit, and (3) a directed (and acyclic, for now)
    graph labeled by \I{UnitId} of the includes of the unit.

    \item The \textbf{shaper} takes each module/signature in the DAG and
    computes its \I{Shape}, i.e., the list of \I{AvailInfo}s which are
    provided/required by each module/signature (respectively).  This
    step is interleaved with the next step.

    \item The \textbf{type checker} takes each
    module/signature in the DAG (annotated with its shape) and type
    checks it.  Cabal will save these type checking results
    in the indefinite unit database under the \I{ComponentId} associated
    with this unit.
\end{enumerate}

At some later point in time, a unit may be compiled against
the installed unit database, if the user specifies a mapping which
instantiates the requirements of a unit (a mapping from \I{ModuleName}s to \I{Module}s):

\begin{enumerate}
    \item The \textbf{dependency solver} operates as before, but it also
    is responsible for computing the full set of \I{InstalledUnitId}s which
    must be compiled before this unit can be compiled, and compiling
    them.

    \item The \textbf{shaper} is not needed. (ToDo: except for recursive
    module loops.)

    \item The \textbf{type checker and compiler} takes each
    module/signature in the DAG, and typechecks and compiles the unit.
    Cabal will save the type checking results and object
    code to the installed unit database under the \I{InstalledUnitId}
    associated with this (instantiated) unit.
\end{enumerate}

\Red{ToDo: Rewrite this for added clarity}

\newpage

\section{Front-end syntax}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
p,q,r && \mbox{Component names} \\
m,n   && \mbox{Module names} \\[1em]
\multicolumn{3}{l}{\mbox{\bf Units}} \\
  \I{unit} & ::= & \verb|unit|\; p\; [\I{provreq}]\; \verb|where {| d_1 \verb|;| \ldots \verb|;| d_n \verb|}| \\[1em]
\multicolumn{3}{l}{\mbox{\bf Declarations}} \\
  d & ::= & \verb|module|\;    m \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|signature|\; m \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|include|\; p \; [provreq] \\[1em]
\multicolumn{3}{l}{\mbox{\bf Provides/requires specification}} \\
\I{provreq} & ::= & \verb|(| \, \I{rns} \, \verb|)| \; 
        [ \verb|requires(|\, \I{rns} \, \verb|)| ] \\
\I{rns} & ::= & \I{rn}_0 \verb|,| \, \ldots \verb|,| \, \I{rn}_n [\verb|,|] & \mbox{Renamings} \\
\I{rn} & ::= & m\; \verb|as| \; n & \mbox{Renaming} \\[1em] 
\multicolumn{3}{l}{\mbox{\bf Haskell code}} \\
\I{exports} & & \mbox{A Haskell module export list} \\
\I{body}    & & \mbox{A Haskell module body} \\
\end{array}
$$
\caption{Syntax of Backpack} \label{fig:syntax}
\end{figure}

A (slightly simplified) syntax of Backpack is given in Figure~\ref{fig:syntax}.

\newpage
\section{Unit databases and the unit renamer}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\multicolumn{3}{l}{\mbox{\bf Identifiers}} \\
  \I{ComponentId} & ::= & \mbox{Opaque unique identifier}  \\
  \I{UnitId} & ::= & \I{ComponentId} \verb|(|\, \I{HoleMap}\, \verb|)|  \\
              & | & \verb|hole| \\
  \I{HoleMap} & ::= & \I{ModuleName}\; \verb|->|\; \I{Module}\; \verb|,|\, \ldots \\
  \I{Module} & ::= & \I{UnitId} \verb|:| \I{ModuleName}  \\
  \I{InstalledUnitId} & ::= & \I{UnitId} \quad \mbox{(with no occurrences of \texttt{hole})} \\[1em]
\multicolumn{3}{l}{\mbox{\bf Unit databases}} \\
  \I{ComponentDb} & ::= & \I{ComponentId} \; \verb|->| \; \I{ComponentRecord} \verb|,|\, \ldots \\
  \I{InstalledUnitDb} & ::= & \I{InstalledUnitId} \; \verb|->| \; \I{InstalledUnitRecord} \verb|,|\, \ldots \\
\end{array}
$$
\caption{Unit identification} \label{fig:ids}
\end{figure}

\subsection{The component and unit database}

To install a package so that it is available when other packages are compiled,
we must record it in some sort of database (which the compiler will query later).
The obvious design for such a database is for it to record \emph{installed packages},
each of which has a collection of object files and interfaces from a compilation.
However, with Backpack, we have to refine this picture in two ways:

\begin{enumerate}
    \item A package is always a unit of distribution: something that has
    single authorship and is uploaded to Hackage.  It would seriously hamper
    modular programming in the small, however, if you always had to create
    a new package to abstract over some requirements.  So we say that a
    package can define multiple \emph{components}.  The \emph{component database}
    records typechecked components.
    \item  In Backpack, it may not be possible to \emph{compile} a component
    at install time: it may depend on some (as yet) unspecified holes.
    A component that is compiled to a specific instantiation is called a
    \emph{unit}.  Thus, we maintain a second \emph{installed unit database}
    which records compiled units; the \emph{component database} contains
    typechecked-only components.  (In practice, these two database are
    stored together, as components with no holes live both in the component
    database and the installed unit database.)
\end{enumerate}
%
Thus, there are four closely related types of identifier to be aware of:

\begin{description}
\item[Component IDs]  A component ID uniquely represents a component
in a Cabal package, including the name and version of the containing package,
the transitive dependencies of the component, and even the build information
for the component.  This ID is opaque to GHC and selected by Cabal
(although GHC may take a component ID and suffix it with a unit name to
derive a new component ID.)  Component IDs identity entries in the
\textbf{component database}, which contains the results of typechecking
a component, but no actual object code.  However, it does contain the
elaborated source, so that it can be built into actual code when
the requirement is filled.

\item[Unit ID]  A unit ID is a component ID augmented with a
\I{HoleMap}, which says how the requirements of the component are
instantiated.  Every component ID induces a unit ID, where each
requirement is filled with a fictitious unit \verb|hole|: when we
typecheck a component for the component database, it is as if we are
``compiling'' it instantiated with holes.  Units instantiated with holes
are never installed; they are strictly for type-checking (although we
could generate code for them, which could be linked if the \verb|hole|
symbols are rewritten to their true destinations).

\item[Installed unit IDs]  An installed unit ID is a unit ID which has
no \verb|hole|s; it identifies a unit that can be compiled.  The
\textbf{installed unit database} caches the compilation results of these
units, so that if a unit is compiled multiple times with the same
instantiation, this code can be reused. (This database most closely
resembles the existing installed package database with GHC today.)
\end{description}

\subsection{The unit renamer}

The unit renamer is responsible for transforming component names in a
Backpack file into \I{ComponentId}s, so that they can be uniquely
identified in the component database.
Given a base \I{ComponentId} of the library component we are compiling (\I{ThisComponentId})
and a mapping from $p$ to \I{ComponentId} (\I{ComponentNameMap}), we rename
as follows:

\begin{itemize}
    \item Every unit declaration $\verb|unit|\; p$ is renamed to $\I{ThisComponentId}\, \verb|-|\, p$.
    \item Every unit include $\verb|include|\; p$ is renamed to $\I{ThisComponentId}\, \verb|-|\, p$ if $p$ was declared in the Backpack file; otherwise it is renamed according to \I{ComponentNameMap}.
\end{itemize}

The \I{ComponentNameMap} is entirely user specified, so there is a great deal
of flexibility on how it can be created, but the convention we expect to
be used by Cabal is that a component name $p$ corresponds to the same-named
unit in a \emph{package} named $p$.  Packages that don't use Backpack
only have one component, the library component, which has the same name as package.

\paragraph{Notational conventions}
In the rest of this document, when it is unambiguous, we will use $p$, $q$ and $r$
interchangeably with \I{ComponentId}, as after unit renaming, there
are no occurrences of component names.

\newpage
\section{Dependency solver}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
  \tilde{d} & ::= & \verb|module|\;    Module \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|signature|\; \I{Module} \; [exports]\; \verb|where|\; \I{body} \\
    & |   & \verb|merge|\; \I{Module} \\
    & |   & \verb|include|\; \I{UnitId} \\
  \I{ComponentRecord}^{\mathsf{dep}} & ::= & \verb|provides:|\; m\; \verb|->|\; \I{Module}\verb|,|\, \ldots\\
    & & \verb|requires:|\; m\verb|,|\, \ldots
\end{array}
$$
\caption{Resolved declarations} \label{fig:resolved}
\end{figure}

The dependency solver computes the unfilled requirements of a component, a
dependency DAG on the modules/signatures in the component, and a dependency
DAG on the includes in the component.  We assume every referenced $p$ in the
component must be recorded in the component database, such that we can
look up $\I{ComponentRecord}^{\mathsf{dep}}$.

\paragraph{Computing unfilled requirements}  The unfilled requirements are $R-P$, where $R$ and $P$ are sets of module names computed from the declarations in the following manner:

\begin{itemize}
    \item $\verb|include|\; p$: union the (domain of the) provisions with $P$ and the requirements with $R$.
    \item $\verb|module|\; m$: add $m$ to $P$.
    \item $\verb|signature|\; m$: add $m$ to $R$.
\end{itemize}

\paragraph{Declaration dependency graph}
We define a graph where the nodes are as described in Figure~\ref{fig:resolved}:
there is a node per
for each module and signature, as well as an extra ``merge'' node for
every unfilled requirement, which merges the interfaces of a local signature and
any requirements brought in from includes.
%
Each node is identified by the tuple $\left(\I{Module}, \I{IsSource?}\right)$, where
the \I{Module} of a declaration $m$ in component $p$ is \verb|p(H):m|, where $H$ maps
each unfilled requirement $n$ to \verb|hole:n|, and \I{IsSource?} is true only for signatures.

The edges of the directed graph signify a ``depends on'' relation, and are
defined as follows:

\begin{itemize}
    \item A module/signature $m$ depends on a module/signature merge $n$ if $m$ imports $n$.
    \item A module/signature $m$ depends on a signature $n$ if $m$ \verb|{-# SOURCE #-}| imports $n$.
    \item A module/signature merge $m$ depends on a local signature $m$ (if it exists).
\end{itemize}
%
If the resulting graph has a cycle, this is an error.

\paragraph{Include dependency graph}  We define an dependency graph
between includes, where an $\verb|include|\; p$ depends on
$\verb|include|\; q$ if, for some module name $m$, $p$ requires $m$ and
$q$ provides $m$.  If there is a cyclic, then there is cross-component
mutual recursion: for now, this is an error.

Assuming an acyclic graph, we can compute the \I{UnitId} of each
key as follows.  Initialize $\Gamma$, a substitution from holes to \I{Module},
to the identity substitution. For each $\verb|include|\; p$ in topological
order, define its \I{UnitId} to be $p$ with the mapping $\Gamma$ with its
domain restricted to the requirements of $p$.  Then, for each provision
$m\; \verb|->|\; \I{Module}$, update $\Gamma$ so that
$\Gamma(m) = \operatorname{subst} (\Gamma, \I{Module})$
(where $\operatorname{subst}$ recursively applies the substitution $\Gamma$ in \I{Module}).

During compilation, the include dependency graph is useful for
determining a compilation order for included units.

\newpage
\section{Requirement calculation}

\Red{to write}

\newpage
\section{Shaping pass}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\I{Shape} & ::= & \verb|provides:|\; m \; \verb|->|\; \I{Module}\; \I{ModShape} \verb|;|\; \ldots \\
      &     & \verb|requires:| \; m \; \verb|->|\; \textcolor{white}{\I{Module}}\; \I{ModShape}  \verb|;|\; \ldots \\
\I{ModShape} & ::= & \I{AvailInfo}_0 \verb|,|\, \ldots \verb|,|\, \I{AvailInfo}_n \\
\I{AvailInfo} & ::= & \I{Name} & \mbox{Plain identifiers} \\
          & |   & \I{Name} \, \verb|{| \, \I{Name}_0\verb|,| \, \ldots\verb|,| \, \I{Name}_n \, \verb|}| & \mbox{Type constructors} \\
\I{Name}   & ::= & \I{Module} \verb|.| \I{OccName} \\
\I{OccName} & & \mbox{Unqualified name in a namespace} \\
\I{ComponentRecord}^{\mathsf{shape}} & ::= & \I{Shape}
\end{array}
$$
\caption{Shaping} \label{fig:shaping}
\end{figure}

The shaping pass computes the export \I{AvailInfo}s for each node in
the dependency graph; collectively, these form the \I{Shape} of
the unit described in Figure~\ref{fig:shaping}.  Equivalently,
the \I{Shape} of unit specifies what a unit requires and provides
at the Haskell declaration level.

An \I{AvailInfo} names a Haskell declaration that may be exported.
It may be a plain identifier \I{Name}, or it may be a type constructor,
in which case it has children \I{Name}s representing the names of the
data constructors, record selectors, etc.  This level of hierarchy
makes it possible to use ellipses in an import list, e.g. \verb|TyCon(..)|,
to selectively import just the logical children of a type constructor.
Children names have the invariant that they have the same \I{Module} as the parent name.
In a \I{ModShape}/export list,
the \I{OccName}s of the plain identifier \I{AvailInfo}s and the \emph{children}
of type constructor are unique
(although the top-level \I{Name}s may not have unique \I{OccName}s).

The compilation of every node is associated with a ``shape context'',
which represents the modules which are transitively depended upon. Let the
environment shape context is the merge of the shapes of all includes; to
shape a node:

\begin{enumerate}
    \item Merge the environment shape contexts with the shape contexts
    of all direct dependencies, resulting in the initial shape context.
    \item Rename the module/signature according to the initial shape context,
    getting a \I{ModShape}.  Importantly, when renaming the signature \verb|M|,
    any declarations defined in the signature are assigned a \I{Name}s with the \I{Module} \verb|hole:M| (rather than a \I{Module} based on the current unit $p$).
    \item Merge this \I{ModShape} into the initial shape context
    (modules go in provisions while signatures go in provisions), the
    result defining the shape context of this node.
\end{enumerate}

We now elaborate on these steps in more detail.

\subsection{Shapes of includes}
Given an \verb|include p (X) requires (Y)|, we can look up the shape
for $p$ from the indefinite package database.  However, an include can
also rename provisions and requires (where $X$, $Y$ are partial maps
from module name to module name), which requires transforms the shape
in the following way:

\begin{itemize}
    \item For each original provision $m\; \verb|->|\; \ldots$, provide
          $X (m)\; \verb|->|\; \ldots$ if $X (m)$ is defined.
    \item For each original requirement $m\; \verb|->|\; \ldots$, require
          $Y (m)\; \verb|->|\; \ldots$ if $Y (m)$ is defined, and $m$ if it is not.
          (Non-mentioned requirements are always passed through).
    \item For each requirement renaming from \verb|M| to \verb|N| in $Y$, substitute
          all occurrences of \verb|hole:M| to \verb|hole:N| in the \I{ModShape}
          of all provisions and requirements.
\end{itemize}

\subsection{Shape merging}
Before specifying the how to merge shapes algorithm, we must define some subprocedures
for unifying and merging lower-level entities such as \I{AvailInfo}s and \I{Name}s,
which produce \I{Name} substitution that are applied to shapes.

\begin{description}
    \item[Unify two \textit{Name}s]
    (produces a \I{Name} and a \I{Name} substitution) \\
    Error if the names do not have matching \I{OccName}s.  Error if neither name
    is a hole name.  Otherwise, without loss of generality let $m$ be the hole name
    and $n$ the other name, return $n$ and the substitution of $m$ to $n$.
    \item[Merge two sets of \textit{Name}s]
    (produces a set of \I{Name}s and a \I{Name} substitution) \\
    Let two \I{Name}s be related if they have the same \I{OccName}.
    Union the two sets, unifying related names.
    \item[Unify two \textit{AvailInfo}s]
    (produces an \I{AvailInfo} and a \I{Name} substitution) \\
    If both \I{AvailInfo}s are simply
    a \I{Name}, unify the two \I{Name}s.  If both \I{AvailInfo}s are
    $\I{Name}\, \verb|{|\, \I{Name}_0\verb|,|\, \ldots\verb|,|\, \I{Name}_n\, \verb|}|$,
    unify the top-level \I{Name}, apply the substitution to both \I{AvailInfo}s,
    and return the unified \I{Name} with the union of the child names of the
    substituted \I{AvailInfo}s.
    Otherwise, error.
    \item[Merge two sets of \textit{AvailInfo}s]
    (produces a set of \I{AvailInfo}s and a \I{Name} substitution) \\
    Let two \I{AvailInfo}s be related if they both are of the form
    \I{Name} and have matching \I{OccName}s, or if they both are of
    the form
    $\I{Name}\, \verb|{|\, \I{Name}_0\verb|,|\, \ldots\verb|,|\, \I{Name}_n\, \verb|}|$
    and there exists a child name in each which have matching \I{OccName}s.
    Union the two sets, unifying related \I{AvailInfo}s.
    \item[Apply a name substitution on an \textit{AvailInfo}] (produces an \I{AvailInfo}) \\
    Substitute the top-level \I{Name}, which induces a substitution from
    \I{Module} to $\I{Module}'$.  Apply this module substitution to each child
    \I{Name} in the \I{AvailInfo}.

\end{description}
%
Shape merging takes two units with inputs (requirements) and outputs
(provisions) and ``wires'' them up so that outputs feed into inputs. To
merge the shape of $p$ with the shape of $q$:

\begin{enumerate}
    \item \emph{Fill every requirement of $q$ with provided modules from
        $p$.} For each requirement $M$ of $q$ that is provided by $p$,
        substitute each \I{Module} occurrence of \verb|hole:M| with the
        provided $p\verb|(|M\verb|)|$ (however, do \textbf{NOT} substitute the
        top-level \I{Module} in a \I{Name}s), merge the \I{AvailInfo}s and apply
        the resulting \I{Name} substitution, and
        remove the requirement from $q$.  If the \I{AvailInfo}s of the
        provision are not a superset of the required \I{AvailInfo}s,
        error.
    \item If mutual recursion is supported, \emph{fill every requirement
        of $p$ with provided modules from $q$.}
    \item \emph{Merge leftover requirements.}  For each requirement $M$
        of $q$ that is not provided by $p$ but required by $p$, and let
        the new requirement be the merge of
        \I{AvailInfo}s, applying the resulting \I{Name} substitution.
    \item \emph{Add provisions of $q$.} Union the provisions of $p$ and $q$, (lazily) erroring
        if there is a duplicate that doesn't have the same \I{Module}.
\end{enumerate}

\newpage
\section{Indefinite type checker}

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\I{ComponentRecord} & ::= & \I{ModIface}_0 \verb|;|\, \ldots\verb|;|\, \I{ModIface}_n \\[1em]
\multicolumn{3}{l}{\mbox{\bf Module interface}} \\
\I{ModIface} & ::= & \verb|module| \; \I{Module} \; \verb|(| \I{mi\_exports} \verb|)| \; \verb|where| \\
& & \qquad \I{mi\_decls} \\
& & \qquad \I{mi\_insts} \\
& & \qquad \I{dep\_orphs} \\
\I{mi\_exports} & ::= & \I{AvailInfo}_0 \verb|,|\, \ldots \verb|,|\, \I{AvailInfo}_n & \mbox{Export list} \\
\I{mi\_decls} & ::= & \I{IfaceDecl}_0 \verb|;|\, \ldots \verb|;|\, \I{IfaceDecl}_n & \mbox{Defined declarations} \\
\I{mi\_insts} & ::= & \I{IfaceClsInst}_0 \verb|;|\, \ldots \verb|;|\, \I{IfaceClsInst}_n & \mbox{Defined instances} \\
\I{dep\_orphs} & ::= & \I{Module}_0 \verb|;|\, \ldots \verb|;|\, \I{Module}_n & \mbox{Transitive orphan dependencies} \\[1em]
\multicolumn{3}{l}{\mbox{\bf Interface declarations}} \\
\I{IfaceDecl} & ::= & \I{OccName} \; \verb|::| \; \I{IfaceId} \\
              & |   & \verb|data| \; \I{OccName} \; \verb|=| \;\ \I{IfaceData} \\
              & |   & \ldots \\
\I{IfaceClsInst} & & \mbox{A type-class instance} \\
\I{IfaceId} & & \mbox{Interface of top-level binder} \\
\I{IfaceData} & & \mbox{Interface of type constructor} \\
\end{array}
$$
\caption{Module interfaces in GHC} \label{fig:typecheck}
\end{figure}

\Red{This needs updating.}

In general terms,
type checking an indefinite unit (a unit with holes) involves
calculating, for every module, a \I{ModIface} representing the
type/interface of the module in question (which is serialized
to disk).  The general form of these
interface files are described in Figure~\ref{fig:typecheck}; notably,
the interfaces \I{IfaceId}, \I{IfaceData}, etc. contain \I{Name} references,
which must be resolved by
looking up a \I{ModIface} corresponding to the \I{Module} associated
with the \I{Name}. (We will say more about this lookup process shortly.)
For example, given:

\begin{verbatim}
    unit p where
        signature H where
            data T
        module A(S, T) where
            import H
            data S = S T
\end{verbatim}
%
the \I{PkgType} is:

\begin{verbatim}
    module hole:H (hole:H.T) where
        data T -- abstract type constructor
    module THIS:A (THIS:A.S, hole:H.T) where
        data S = S hole:H.T
    -- where THIS = p(H -> hole:H)
\end{verbatim}
%
However, while it is true that the \I{ModIface} is the final result
of type checking, we actually are conflating two distinct concepts: the user-visible
notion of a \I{ModuleName}, which, when imported, brings some \I{Name}s
into scope (or could trigger a deprecation warning, or pull in some
orphan instances\ldots), versus the actual declarations, which, while recorded
in the \I{ModIface}, have an independent existence: even if a declaration
is not visible for an import, we may internally refer to its \I{Name}, and
need to look it up to find out type information.  (A simple case when
this can occur is if a module exports a function with type \verb|T -> T|,
but doesn't export \verb|T|).

\begin{figure}[htpb]
$$
\begin{array}{rcll}
\I{ModDetails} & ::= & \langle\I{md\_types} \verb|;|\; \I{md\_insts}\rangle \\
\I{md\_types}  & ::= & \I{TyThing}_0 \verb|,|\, \ldots\verb|,|\, \I{TyThing}_n \\
\I{md\_insts}  & ::= & \I{ClsInst}_0 \verb|,|\, \ldots\verb|,|\, \I{ClsInst}_n \\[1em]
\multicolumn{3}{l}{\mbox{\bf Type-checked declarations}} \\
\I{TyThing}    &     & \mbox{Type-checked thing with a \I{Name}} \\
\I{ClsInst}    &     & \mbox{Type-checked type class instance} \\
\end{array}
$$
\caption{Semantic objects in GHC} \label{fig:typecheck-more}
\end{figure}

Thus, a \I{ModIface} can be type-checked into a \I{ModDetails}, described in
Figure~\ref{fig:typecheck-more}.  Notice that a \I{ModDetails} is just
a bag of type-checkable entities which GHC knows about.  We
define the \emph{external package state (EPT)} to
simply be the union of the \I{ModDetails}
of all external modules.

Type checking is a delicate balancing act between module
interfaces and our semantic objects.  A \I{ModIface} may get
type-checked multiple times with different hole instantiations
to provide multiple \I{ModDetails}.
Furthermore complicating matters
is that GHC does this resolution \emph{lazily}: a \I{ModIface}
is only converted to a \I{ModDetails} when we are looking up
the type of a \I{Name} that is described by the interface;
thus, unlike usual theoretical treatments of type checking, we can't
eagerly go ahead and perform substitutions on \I{ModIface}s when
they get included.

In a separate compiler like GHC, there are two primary functions we must provide:

\paragraph{\textit{ModuleName} to \textit{ModIface}}  Given a \I{ModuleName} which
was explicitly imported by a user, we must produce a \I{ModIface}
that, among other things, specifies what \I{Name}s are brought
into scope.  This is used by the renamer to resolve plain references
to identifiers to real \I{Name}s.  (By the way, if shaping produced
renamed trees, it would not be necessary to do this step!)

\paragraph{\textit{Module} to \textit{ModDetails}/EPT}  Given a \I{Module} which may be
a part of a \I{Name}, we must be able to type check it into
a \I{ModDetails} (usually by reading and typechecking the \I{ModIface}
associated with the \I{Module}, but this process is involved).  This
is used by the type checker to find out type information on things. \\

There are two points in the type checker where these capabilities are exercised:

\paragraph{Source-level imports}  When a user explicitly imports a
module, the \textit{ModuleName} is mapped to a \textit{ModIface}
to find out what exports are brought into scope (\I{mi\_exports})
and what orphan instances must be loaded (\I{dep\_orphs}).  Additionally,
the \textit{Module} is loaded to the EPT to bring instances from
the module into scope.

\paragraph{Internal name lookup}  During type checking, we may have
a \I{Name} for which we need type information (\I{TyThing}).  If it's not already in the
EPT, we type check and load
into the EPT the \I{ModDetails} of the \I{Module} in the \I{Name},
and then check the EPT again. (\verb|importDecl|)

\subsection{\textit{ModuleName} to \textit{ModIface}}

In all cases, the \I{mi\_exports} can be calculated directly from the
shaping process, which specifies exactly for each \I{ModuleName} in scope
what will be brought into scope.

\paragraph{Modules} Modules are straightforward, as for any
\I{Module} there is only one possibly \I{ModIface} associated
with it (the \I{ModIface} for when we type-checked the (unique) \verb|module|
declaration.)

\paragraph{Signatures} For signatures, there may be multiple \I{ModIface}s
associated with a \I{ModuleName} in scope, e.g. in this situation:

\begin{verbatim}
    unit p where
        signature S where
            data A
    unit q where
        include p
        signature S where
            data B
        module M where
            import S
\end{verbatim}
%
Each literal \verb|signature| has a \I{ModIface} associated with it; and
the import of \verb|S| in \verb|M|, we want to see the \emph{merged}
\I{ModIface}s.  We can determine the \I{mi\_exports} from the shape,
but we also need to pull in orphan instances for each signature, and
produce a warning for each deprecated signature.

\begin{aside}
\textbf{Does hiding a signature hide its orphans.} Suppose that we have
extended Backpack to allow hiding signatures from import.

\begin{verbatim}
    unit p requires (H) where -- H is hidden from import
        module A where
            instance Eq (a -> b) where -- orphan
        signature H {-# DEPRECATED "Don't use me" #-} where
            import A

    unit q where
        include p
        signature H where
            data T
        module M where
            import H                -- warn deprecated?
            instance Eq (a -> b)    -- overlap?
\end{verbatim}

It is probably the most consistent to not pull in orphan instances
and not give the deprecated warning: this corresponds to merging
visible \I{ModIface}s, and ignoring invisible ones.
\end{aside}

\subsection{\textit{Module} to \textit{ModDetails}}

\paragraph{Modules}  For modules, we have a \I{Module} of
the form $\I{p}\verb|(|m\; \verb|->|\; \I{Module}\verb|,|\, \ldots\verb|)|$,
and we also have a unique \I{ModIface}, where each hole instantiation
is $\verb|hole:|m$.

To generate the \I{ModDetails} associated with the specific instantiation,
we have to type-check the \I{ModIface} with the following adjustments:

\begin{enumerate}
    \item Perform a \I{Module} substitution according to the instantiation
          of the \I{ModIface}'s \I{Module}. (NB: we \emph{do}
          substitute \verb|hole:A.x| to \verb|hole:B.x| if we instantiated
          \verb|A -> hole:B|, \emph{unlike} the disjoint
          substitutions applied by shaping.)
    \item Perform a \I{Name} substitution as follows: for any name
          with a unit key that is a $\verb|hole|$,
          substitute with the recorded \I{Name} in the requirements of the shape.
          Otherwise, look up the (unique) \I{ModIface} for the \I{Module},
          and substitute with the corresponding \I{Name} in the \I{mi\_exports}.
\end{enumerate}

\paragraph{Signatures}  For signatures, we have a \I{Module} of the form
$\verb|hole:|m$.  Unlike modules, there are multiple \I{ModIface}s associated with a hole.
We distinguish each separate \I{ModIface} by considering the full \I{UnitId}
it was defined in, e.g. \verb|p(A -> hole:C, B -> q():B)|; call this
the hole's \emph{defining unit key}; the set of \I{ModIface}s for a hole
and their defining unit keys can easily be calculated during shaping.

To generate the \I{ModDetails} associated with a hole, we type-check each
\I{ModIface}, with the following adjustments:

\begin{enumerate}
    \item Perform a \I{Module} substitution according to the instantiation
        of the defining unit key.  (NB: This may rename the hole itself!)
    \item Perform a \I{Name} substitution as follows, in the same manner
        as would be done in the case of modules.
    \item When these \I{ModDetails} are merged into the EPT, some merging
        of duplicate types may occur; a type
        may be defined multiple times, in which case we check that each
        definition is compatible with the previous ones.  A concrete
        type is always compatible with an abstract type.
\end{enumerate}

\paragraph{Invariants} When we perform \I{Name} substitutions, we must be
sure that we can always find out the correct \I{Name} to substitute to.
This isn't obviously true, consider:

\begin{verbatim}
    unit p where
        signature S(foo) where
            data T
            foo :: T
        module M(bar) where
            import S
            bar = foo
    unit q where
        module A(T(..)) where
            data T = T
            foo = T
        module S(foo) where
            import A
        include p
        module A where
            import M
            ... bar ...
\end{verbatim}
%
When we type check \verb|p|, we get the \I{ModIface}s:

\begin{verbatim}
    module hole:S(hole:S.foo) where
        data T
        foo :: hole:S.T
    module THIS:M(THIS:M.bar) where
        bar :: hole:S.T
\end{verbatim}
%
Now, when we type check \verb|A|, we pull on the \I{Name} \verb|p(S -> q():S):M.bar|,
which means we have to type check the \I{ModIface} for \verb|p(S -> q():S):M|.
The un-substituted type of \verb|bar| has a reference to \verb|hole:S.T|;
this should be substituted to \verb|q():S.T|.  But how do we discover this?
We know that \verb|hole:S| was instantiated to \verb|q():S|, so we might try
and look for \verb|q():S.T|.  However, this \I{Name} does not exist because
the \verb|module S| reexports the selector from \verb|A|!  Nor can we consult
the (unique) \I{ModIface} for the module, as it doesn't reexport the relevant
type.

The conclusion, then, is that a module written this way should be disallowed.
Specifically, the correctness condition for a signature is this: \emph{Any \I{Name}
mentioned in the \I{ModIface} of a signature must either be from an external module, or be
exported by the signature}.

\newpage
\section{Installation}

This section defines the syntax for the file-system format of the \I{ComponentDb}.
Like entries in the installed unit database, an entry is a sequence of fields
with values.

Indefinite unit entries share some entries in common with entries in the installed
unit database:

\begin{description}
    \item[\texttt{component-id:}] \I{ComponentId} \newline
        The unique identifier of an installed package.  This combined
        with \texttt{unit-name} uniquely identifies an entry in the
        installed unit database.
    \item[\texttt{exposed:}] \verb|True| or \verb|False| \newline
        Whether or not this unit is exposed, i.e. it is available for
        \verb|include| under its \verb|unit-name| (this is used to compute
        the default \I{ComponentNameMap} when GHC is called by itself).
    \item[\texttt{import-dirs:}] \I{FilePath} \newline
        Where interface files for this unit can be found. (NB: these
        interface files are templates, which contain references to holes
        which we can substitute.)  (There's exactly one.)
    \item[\texttt{exposed-modules:}] \I{ModuleName} or \I{ModuleName} \texttt{from} \I{Module} $\ldots$ \newline
        The set of exposed modules from this unit, including reexports from
        other units.
    \item[\texttt{other-modules:}] \I{ModuleName} $\ldots$ \newline
        Non-exposed modules; there is an interface for each of these
        in the import-dirs. (Redundant, but useful for error reporting.)
\end{description}
%
As well as all non-essential, Cabal-specific metadata; e.g. \texttt{name}, \texttt{version}, \ldots (\texttt{data-dir} and \texttt{haddock} probably)
Here are new entries for indefinite units:

\begin{description}
    \item[\texttt{requires:}] \I{ModuleName} \ldots \newline
        The set of module names which are requirements of this unit.
        (Installed units instead record \texttt{instantiated-with}, which
        specifies how each requirement was instantiated.)  Every
        requirement has an interface in the import-dirs.
    \item[\texttt{source-dir:}] \I{FilePath} \newline
        The path to the original source of the package.
    \item[\texttt{setup-executable:}] \I{FilePath} \newline
        The path to the \texttt{Setup} executable as described by the Cabal
        specification which is capable of building and installing the package
        using \texttt{./Setup instantiate} (this is a new command which lets
        us program how the requirements of the indefinite unit should be filled),
        \texttt{./Setup build}, \texttt{./Setup copy} and
        \texttt{./Setup register}.
    \item[\texttt{package-config:}] \I{FilePath} \newline
        The path to the package configuration saved when the indefinite
        unit was installed.  This should contain all of the relevant configuration
        information necessary to build a package, except how its requirements
        are instantiated.
\end{description}
%
The string representation of \I{Module} is worth remarking upon.  In
this specification, \I{Module} is a recursive data structure.  For
installed packages, it is acceptable to flatten \I{Module} into a
hash representing the \I{UnitId} and the \I{ModuleName}, as the \I{UnitId}
is an \I{InstalledUnitId} which has an entry in the database.  However,
this is unacceptable for indefinite units, because we don't have an
entry per \I{UnitId}.  So, for \I{UnitId}s in the indefinite unit database,
the full tree is written out, subject to this syntax:

\begin{verbatim}
Module ::= UnitId ":" ModuleName
UnitId ::= InstalledPackageId
          [ "/" UnitName "(" HoleMap ")" ]
          | "hole"
HoleMap ::= ModuleName "->" Module "," ...
\end{verbatim}

\section{Appendix: Shaping}

This section clarifies some subtle aspects about shaping.

\subsection{\textit{OccName} is implied by \textit{Name}}
In Haskell, the following is not valid syntax:

\begin{verbatim}
    import A (foobar as baz)
\end{verbatim}
In particular, a \I{Name} which is in scope will always have the same
\I{OccName} (even if it may be qualified.)  You might imagine relaxing
this restriction so that declarations can be used under different \I{OccName}s;
in such a world, we need a different definition of shape:

\begin{verbatim}
    Shape ::=
        provided: ModuleName -> Module { OccName -> Name }
        required: ModuleName ->        { OccName -> Name }
\end{verbatim}
Presently, however, such an \I{OccName} annotation would be redundant: it can be inferred from the \I{Name}.

\subsection{Holes of a unit are a mapping, not a set.}

Why can't the \I{UnitId} just record a
set of \I{Module}s, e.g. $\I{UnitId}\;::= \; p \; \verb|{| \; \I{Module} \; \verb|}|$?  Consider:

\begin{verbatim}
    unit p (A) requires (H1, H2) where
        signature H1(T) where
            data T
        signature H2(T) where
            data T
        module A(A(..)) where
            import qualified H1
            import qualified H2
            data A = A H1.T H2.T

    unit q (A12, A21) where
        module I1(T) where
            data T = T Int
        module I2(T) where
            data T = T Bool
        include p (A as A12) requires (H1 as I1, H2 as I2)
        include p (A as A21) requires (H1 as I2, H2 as I1)
\end{verbatim}
With a mapping, the first instance of \verb|p| has key \verb|p(H1 -> q():I1, H2 -> q():I2)|
while the second instance has key \verb|p(H1 -> q():I2, H2 -> q():I1)|; with
a set, both would have the key \verb|p{q():I1, q():I2}| and be
indistinguishable.

\subsection{Signatures can require a specific entity.}
With requirements like \verb|A -> { hole:A.T, hole:A.foo }|,
why not specify it as \verb|A -> { T, foo }|,
e.g., \verb|required: { ModuleName -> { OccName } }|?  Consider:

\begin{verbatim}
    unit p () requires (A, B) where
        signature A(T) where
            data T
        signature B(T) where
            import T
\end{verbatim}
The requirements of this unit specify that \verb|A.T| $=$ \verb|B.T|; this
can be expressed with \I{Name}s as

\begin{verbatim}
    A -> { hole:A.T }
    B -> { hole:A.T }
\end{verbatim}
But, without \I{Name}s, the sharing constraint is impossible:  \verb|A -> { T }; B -> { T }|. (NB: \verb|A| and \verb|B| could be filled with different modules, they just have
to both export the same \verb|T|.)

\subsection{The \textit{Name} of a value is used to avoid
ambiguous identifier errors.}
We state that two types
are equal when their \I{Name}s are the same; however,
for values, it is less clear why we care.  But consider this example:

\begin{verbatim}
    unit p (A) requires (H1, H2) where
        signature H1(x) where
            x :: Int
        signature H2(x) where
            import H1(x)
        module A(y) where
            import H1
            import H2
            y = x
\end{verbatim}
The reference to \verb|x| in \verb|A| is unambiguous, because it is known
that \verb|x| from \verb|H1| and \verb|x| from \verb|H2| are the same (have
the same \I{Name}.)  If they were not the same, it would be ambiguous and
should cause an error.  Knowing the \I{Name} of a value distinguishes
between these two cases.

\subsection{Holes are linear}
Requirements do not record what \I{Module} represents
the identity of a requirement, which means that it's not possible to assert
that hole \verb|A| and hole \verb|B| should be implemented with the same module,
as might occur with aliasing:

\begin{verbatim}
    signature A where
    signature B where
    alias A = B
\end{verbatim}
%
The benefit of this restriction is that when a requirement is filled,
it is obvious that this is the only requirement that is filled: you won't
magically cause some other requirements to be filled.  The downside is
it's not possible to write a unit which looks for an interface it is
looking for in one of $n$ names, accepting any name as an acceptable linkage.
If aliasing was allowed, we'd need a separate physical shaping context,
to make sure multiple mentions of the same hole were consistent.

\subsection{A unit does not ``provide'' its signatures}
We enforce the invariant that
a provision is always (syntactically) a \verb|module| and a requirement
is always a \verb|signature|.  This means that if you have a requirement
and a provision of the same name, the requirement can \emph{always} be filled
with the provision.

The alternate design, where a unit both requires and provides
its signatures, makes it unclear if a provision
will actually fill a signature.  Consider this example, where
a signature is required and exposed:

\begin{verbatim}
    unit a-sigs (A) requires (A) where -- ***
        signature A where
            data T

    unit a-user (B) requires (A) where
        signature A where
            data T
            x :: T
        module B where
            ...

    unit p where
        include a-sigs
        include a-user
\end{verbatim}
%
When we consider merging in the shape of \verb|a-user|, does the
\verb|A| provided by \verb|a-sigs| fill in the \verb|A| requirement
in \verb|a-user|?  It \emph{should not}, since \verb|a-sigs| does not
actually provide enough declarations to satisfy \verb|a-user|'s
requirement: the intended semantics \emph{merges} the requirements
of \verb|a-sigs| and \verb|a-user|.

What about this example?

\begin{verbatim}
    unit a-sigs (M as A) requires (H as A) where
        signature H(T) where
            data T
        module M(T) where
            import H(T)
\end{verbatim}
%
We rightly should error, since the provision is a module. And in this situation:

\begin{verbatim}
    unit a-sigs (H as A) requires (H) where
        signature H(T) where
            data T
\end{verbatim}
%
The requirements should be merged, but should the merged requirement
be under the name \verb|H| or \verb|A|?

It may still be possible to use the \verb|(A) requires (A)| syntax to
indicate exposed signatures, but this would be a mere syntactic
alternative to \verb|() requires (exposed A)|.

\subsection{Signature visibility, and defaulting}
The simplest formulation of requirements is to have them always be
importable.  One proposed enhancement, however, is to allow some
requirements to be ``non-importable''; that is, they are not visible
to people who include packages.

One simple way of modeling this is to associate each required module
with a flag indicating whether or not it is importable or not.  Then, we
might imagine that an explicit export list could be used to toggle
whether or not a requirement is visible or not.

However, when an export list is absent, we have to pick a default
visibility for a signature.  If we use the same behavior as with
modules, a strange situation can occur:

\begin{verbatim}
    unit p where -- S is visible
        signature S where
            x :: True

    unit q where -- use defaulting
        include p
        signature S where
            y :: True
        module M where
            import S
            z = x && y      -- OK

    unit r where
        include q
        module N where
            import S
            z = y           -- OK
            z = x           -- ???
\end{verbatim}
%
Absent the second signature declaration in \verb|q|, \verb|S.x| clearly
should not be visible in \verb|N|.  However, what ought to occur when this signature
declaration is added?  One interpretation is to say that only some
(but not all) declarations are provided (\verb|S.x| remains invisible);
another interpretation is that adding \verb|S| is enough to treat
the signature as ``in-line'', and all declarations are now provided
(\verb|S.x| is visible).

The latter interpretation avoids having to keep track of providedness
per declarations, and means that you can always express defaulting
behavior by writing an explicit provides declaration on the unit.
However, it has the odd behavior of making empty signatures semantically
meaningful:

\begin{verbatim}
unit q where
    include p
    signature S where
\end{verbatim}
%
%   SPJ: This would be too complicated (if there's yet a third way)

This is pretty complicated, so signature visibility is not currently
planned to be implemented.

\subsection{Tricky \textit{AvailInfo} merging scenarios}

\paragraph{Merging when type constructors are not in scope}

\begin{verbatim}
    signature A1(foo) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(bar) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}
%
If we merge \verb|A1| and \verb|A2|, are we supposed to conclude that
the types \verb|A1.A| and \verb|A2.A| (not in scope!) are the same?
The answer is no!  Consider these implementations:

\begin{verbatim}
    module A1(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    module A2(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    module A(foo, bar) where
        import A1(foo)
        import A2(bar)
\end{verbatim}

Here, \verb|module A1| implements \verb|signature A1|, \verb|module A2| implements \verb|signature A2|,
and \verb|module A| implements \verb|signature A1| and \verb|signature A2| individually
and should certainly implement their merge.  This is why we cannot simply
merge type constructors based on the \I{OccName} of their top-level type;
merging only occurs between in-scope identifiers.

\paragraph{Does merging a selector merge the type constructor?}

\begin{verbatim}
    signature A1(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(A(..)) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(foo) where
        import A1(foo)
\end{verbatim}
%
Does the last signature, which is written in the style of a sharing constraint on \verb|foo|,
also cause \verb|bar| and the type and constructor \verb|A| to be unified?
Because a merge of a child name results in a substitution on the parent name,
the answer is yes.

\paragraph{Incomplete data declarations}

\begin{verbatim}
    signature A1(A(foo)) where
        data A = A { foo :: Int }

    signature A2(A(bar)) where
        data A = A { bar :: Bool }
\end{verbatim}
%
Should \verb|A1| and \verb|A2| merge?  If yes, this would imply
that data definitions in signatures could only be \emph{partial}
specifications of their true data types.  This seems complicated,
which suggests this should not be supported; however, in fact,
this sort of definition, while disallowed during type checking,
should be \emph{allowed} during shaping. The reason that the
shape we abscribe to the signatures \verb|A1| and \verb|A2| are
equivalent to the shapes for these which should merge:

\begin{verbatim}
    signature A1(A(foo)) where
        data A = A { foo :: Int, bar :: Bool }

    signature A2(A(bar)) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}

\subsection{Subtyping record selectors as functions}

\begin{verbatim}
    signature H(A, foo) where
        data A
        foo :: A -> Int

    module M(A, foo) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}
%
Does \verb|M| successfully fill \verb|H|?  If so, it means that anywhere
a signature requests a function \verb|foo|, we can instead validly
provide a record selector.  This capability seems quite attractive,
although in practice record selectors rarely seem to be abstracted this
way: one reason is that \verb|M.foo| still \emph{is} a record selector,
and can be used to modify a record.  (Many library authors find this
surprising!)

Nor does this seem to be an insurmountable instance of the avoidance
problem:
as a workaround, \verb|H| can equivalently be written as:

\begin{verbatim}
    signature H(foo) where
        data A = A { foo :: Int, bar :: Bool }
\end{verbatim}
%
However, you might not like this, as the otherwise irrelevant \verb|bar| must be mentioned
in the definition.

In any case, actually implementing this `subtyping' is quite complicated, because we can no
longer assume that every child name is associated with a parent name.
The technical difficulty is that we now need to unify a plain identifier
\I{AvailInfo} (from the signature) with a type constructor \I{AvailInfo}
(from a module.)  It is not clear what this should mean.
Consider this situation:

\begin{verbatim}
    unit p where
        signature H(A, foo, bar) where
            data A
            foo :: A -> Int
            bar :: A -> Bool
        module X(A, foo) where
            import H
    unit q where
        include p
        signature H(bar) where
            data A = A { foo :: Int, bar :: Bool }
        module Y where
            import X(A(..)) -- ???
\end{verbatim}

Should the wildcard import on \verb|X| be allowed?
This question is equivalent to whether or not shaping discovers
whether or not a function is a record selector and propagates this
information elsewhere.
If the wildcard is not allowed, here is another situation:

\begin{verbatim}
    unit p where
        -- define without record selectors
        signature X1(A, foo) where
            data A
            foo :: A -> Int
        module M1(A, foo) where
            import X1

    unit q where
        -- define with record selectors (X1s unify)
        signature X1(A(..)) where
            data A = A { foo :: Int, bar :: Bool }
        signature X2(A(..)) where
            data A = A { foo :: Int, bar :: Bool }

        -- export some record selectors
        signature Y1(bar) where
            import X1
        signature Y2(bar) where
            import X2

    unit r where
        include p
        include q

        -- sharing constraint
        signature Y2(bar) where
            import Y1(bar)

        -- the payload
        module Test where
            import M1(foo)
            import X2(foo)
            ... foo ... -- conflict?
\end{verbatim}

Without the sharing constraint, the \verb|foo|s from \verb|M1| and \verb|X2|
should conflict.  With it, however, we should conclude that the \verb|foo|s
are the same, even though the \verb|foo| from \verb|M1| is \emph{not}
considered a child of \verb|A|, and even though in the sharing constraint
we \emph{only} unified \verb|bar| (and its parent \verb|A|).  To know that
\verb|foo| from \verb|M1| should also be unified, we have to know a bit
more about \verb|A| when the sharing constraint performs unification;
however, the \I{AvailInfo} will only tell us about what is in-scope, which
is \emph{not} enough information.

\subsection{Some examples}

\subsubsection{A simple example}

In the following set of units:

\begin{verbatim}
    unit p(M) requires (A) where
        signature A(T) where
            data T
        module M(T, S) where
            import A(T)
            data S = S T

    unit q where
        module A where
            data T = T
        include p
\end{verbatim}

When we \verb|include p|, we need to merge the partial shape
of \verb|q| (with just provides \verb|A|) with the shape
of \verb|p|.  Here is each step of the merging process:

\begin{verbatim}
          shape 1                       shape 2
          --------------------------------------------------------------------------------
(initial shapes)
provides: A -> THIS:A { q():A.T }       M -> p(A -> hole:A) { hole:A.T, p(A -> hole:A).S }
requires: (nothing)                     A ->                { hole:A.T }

(after filling requirements)
provides: A -> THIS:A { q():A.T }       M -> p(A -> THIS:A) { q():A.T, p(A -> THIS:A).S }
requires: (nothing)                     (nothing)

(after adding provides)
provides: A -> THIS:A         { q():A.T }
          M -> p(A -> THIS:A) { q():A.T, p(A -> THIS:A).S }
requires: (nothing)
\end{verbatim}

Notice that we substituted \verb|hole:A| with \verb|THIS:A|, but \verb|hole:A.T| with \verb|q():A.T|.

\subsubsection{Requirements merging can affect provisions}

When a merge results in a substitution, we substitute over both
requirements and provisions:

\begin{verbatim}
    signature H(T) where
        data T
    module A(T) where
        import H(T)
    module B(T) where
        data T = T

    -- provides: A -> THIS:A { hole:H.T }
    --           B -> THIS:B { THIS:B.T }
    -- requires: H ->        { hole:H.T }

    signature H(T, f) where
        import B(T)
        f :: a -> a

    -- provides: A -> THIS:A { THIS:B.T }           -- UPDATED
    --           B -> THIS:B { THIS:B.T }
    -- requires: H ->        { THIS:B.T, hole:H.f } -- UPDATED
\end{verbatim}

\subsubsection{Sharing constraints}

Suppose you have two signature which both independently define a type,
and you would like to assert that these two types are the same.  In the
ML world, such a constraint is known as a sharing constraint.  Sharing
constraints can be encoded in Backpacks via clever use of reexports;
they are also an instructive example for signature merging.

\begin{verbatim}
    signature A(T) where
        data T
    signature B(T) where
        data T

    -- requires: A -> { hole:A.T }
                 B -> { hole:B.T }

    -- the sharing constraint!
    signature A(T) where
        import B(T)
    -- (shape to merge)
    -- requires: A -> { hole:B.T }

    -- (after merge)
    -- requires: A -> { hole:A.T }
    --           B -> { hole:A.T }
\end{verbatim}
%
\Red{I'm pretty sure any choice of \textit{Name} is OK, since the
subsequent substitution will make it alpha-equivalent.}

\subsection{Export declarations}

If an explicit export declaration is given, the final shape is the
computed shape, minus any provisions not mentioned in the list, with the
appropriate renaming applied to provisions and requirements.  (Requirements
are implicitly passed through if they are not named.)
If no explicit export declaration is given, the final shape is
the computed shape, including only provisions which were defined
in the declarations of the unit.


\section{Cabal}


%  \I{InstalledUnitId} & ::= & \I{ComponentId} \verb|(| \, m \; \verb|->| \; \I{Module} \verb|,|\, \ldots\, \verb|)| & \mbox{Also known as \I{UnitId}} \\
%  \I{Module} & ::= & \I{InstalledUnitId} \verb|:| m \\

\paragraph{Indefinite versus installed units}
The purpose of an \I{ComponentId} is to uniquely identify the results of \textbf{typechecking}
an indefinite unit; whereas an \I{InstalledUnitId} uniquely identifies
the results of \textbf{compiling} a unit with all its holes
filled.  Thus, an \I{InstalledUnitId} also records a \emph{hole mapping}
which specifies how each hole was filled.  If an \I{InstalledUnitId}
is only partially filled, we may refer to it as a \I{UnitId} (as these
are never installed.)

\paragraph{Units versus packages}

Cabal packages are:

\begin{itemize}
    \item The unit of distribution
    \item The unit that Hackage handles
    \item The unit of versioning
    \item The unit of ownership (who maintains it etc) 
\end{itemize}

Backpack units are the building blocks of modular development;
there may be multiple units per a Cabal package.  While in theory
Cabal could do sophisticated things with multiple units in a
package, we expect Cabal
to pick a distinguished unit (with the same unit name $p$ as
the package) which serves as the publically visible unit.


%\newpage

\end{document} % chktex 16