summaryrefslogtreecommitdiff
path: root/libraries/base
diff options
context:
space:
mode:
authorMichael Orlitzky <michael@orlitzky.com>2014-10-31 09:50:00 +0100
committerHerbert Valerio Riedel <hvr@gnu.org>2014-10-31 10:46:39 +0100
commit8ef4cf161dc76963a0094bdfe6bdc623a4863787 (patch)
treee7474022972a6472f47df95921a122108e4bc5c4 /libraries/base
parent919e9303c1ce0fffc5d09b6875f17cf9b1f5167d (diff)
downloadhaskell-8ef4cf161dc76963a0094bdfe6bdc623a4863787.tar.gz
Add doctest examples for Data.Char
This adds doctest examples for every function and data type in `Data.Char`. Reviewed By: austin, hvr Differential Revision: https://phabricator.haskell.org/D371
Diffstat (limited to 'libraries/base')
-rw-r--r--libraries/base/Data/Char.hs341
1 files changed, 328 insertions, 13 deletions
diff --git a/libraries/base/Data/Char.hs b/libraries/base/Data/Char.hs
index ac708ac0ef..e4e7fbfcb8 100644
--- a/libraries/base/Data/Char.hs
+++ b/libraries/base/Data/Char.hs
@@ -62,10 +62,38 @@ import GHC.Unicode
import GHC.Num
import GHC.Enum
--- | Convert a single digit 'Char' to the corresponding 'Int'.
--- This function fails unless its argument satisfies 'isHexDigit',
--- but recognises both upper and lower-case hexadecimal digits
--- (i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
+-- $setup
+-- Allow the use of Prelude in doctests.
+-- >>> import Prelude
+
+-- | Convert a single digit 'Char' to the corresponding 'Int'. This
+-- function fails unless its argument satisfies 'isHexDigit', but
+-- recognises both upper- and lower-case hexadecimal digits (that
+-- is, @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
+--
+-- ==== __Examples__
+--
+-- Characters @\'0\'@ through @\'9\'@ are converted properly to
+-- @0..9@:
+--
+-- >>> map digitToInt ['0'..'9']
+-- [0,1,2,3,4,5,6,7,8,9]
+--
+-- Both upper- and lower-case @\'A\'@ through @\'F\'@ are converted
+-- as well, to @10..15@.
+--
+-- >>> map digitToInt ['a'..'f']
+-- [10,11,12,13,14,15]
+-- >>> map digitToInt ['A'..'F']
+-- [10,11,12,13,14,15]
+--
+-- Anything else throws an exception:
+--
+-- >>> digitToInt 'G'
+-- *** Exception: Char.digitToInt: not a digit 'G'
+-- >>> digitToInt '♥'
+-- *** Exception: Char.digitToInt: not a digit '\9829'
+--
digitToInt :: Char -> Int
digitToInt c
| (fromIntegral dec::Word) <= 9 = dec
@@ -77,9 +105,61 @@ digitToInt c
hexl = ord c - ord 'a'
hexu = ord c - ord 'A'
--- | Unicode General Categories (column 2 of the UnicodeData table)
--- in the order they are listed in the Unicode standard.
-
+-- | Unicode General Categories (column 2 of the UnicodeData table) in
+-- the order they are listed in the Unicode standard (the Unicode
+-- Character Database, in particular).
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> :t OtherLetter
+-- OtherLetter :: GeneralCategory
+--
+-- 'Eq' instance:
+--
+-- >>> UppercaseLetter == UppercaseLetter
+-- True
+-- >>> UppercaseLetter == LowercaseLetter
+-- False
+--
+-- 'Ord' instance:
+--
+-- >>> NonSpacingMark <= MathSymbol
+-- True
+--
+-- 'Enum' instance:
+--
+-- >>> enumFromTo ModifierLetter SpacingCombiningMark
+-- [ModifierLetter,OtherLetter,NonSpacingMark,SpacingCombiningMark]
+--
+-- 'Read' instance:
+--
+-- >>> read "DashPunctuation" :: GeneralCategory
+-- DashPunctuation
+-- >>> read "17" :: GeneralCategory
+-- *** Exception: Prelude.read: no parse
+--
+-- 'Show' instance:
+--
+-- >>> show EnclosingMark
+-- "EnclosingMark"
+--
+-- 'Bounded' instance:
+--
+-- >>> minBound :: GeneralCategory
+-- UppercaseLetter
+-- >>> maxBound :: GeneralCategory
+-- NotAssigned
+--
+-- 'Ix' instance:
+--
+-- >>> import Data.Ix ( index )
+-- >>> index (OtherLetter,Control) FinalQuote
+-- 12
+-- >>> index (OtherLetter,Control) Format
+-- *** Exception: Error in array index
+--
data GeneralCategory
= UppercaseLetter -- ^ Lu: Letter, Uppercase
| LowercaseLetter -- ^ Ll: Letter, Lowercase
@@ -113,15 +193,79 @@ data GeneralCategory
| NotAssigned -- ^ Cn: Other, Not Assigned
deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix)
--- | The Unicode general category of the character.
+-- | The Unicode general category of the character. This relies on the
+-- 'Enum' instance of 'GeneralCategory', which must remain in the
+-- same order as the categories are presented in the Unicode
+-- standard.
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> generalCategory 'a'
+-- LowercaseLetter
+-- >>> generalCategory 'A'
+-- UppercaseLetter
+-- >>> generalCategory '0'
+-- DecimalNumber
+-- >>> generalCategory '%'
+-- OtherPunctuation
+-- >>> generalCategory '♥'
+-- OtherSymbol
+-- >>> generalCategory '\31'
+-- Control
+-- >>> generalCategory ' '
+-- Space
+--
generalCategory :: Char -> GeneralCategory
generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c
-- derived character classifiers
-- | Selects alphabetic Unicode characters (lower-case, upper-case and
--- title-case letters, plus letters of caseless scripts and modifiers letters).
--- This function is equivalent to 'Data.Char.isAlpha'.
+-- title-case letters, plus letters of caseless scripts and
+-- modifiers letters). This function is equivalent to
+-- 'Data.Char.isAlpha'.
+--
+-- This function returns 'True' if its argument has one of the
+-- following 'GeneralCategory's, or 'False' otherwise:
+--
+-- * 'UppercaseLetter'
+-- * 'LowercaseLetter'
+-- * 'TitlecaseLetter'
+-- * 'ModifierLetter'
+-- * 'OtherLetter'
+--
+-- These classes are defined in the
+-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
+-- part of the Unicode standard. The same document defines what is
+-- and is not a \"Letter\".
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> isLetter 'a'
+-- True
+-- >>> isLetter 'A'
+-- True
+-- >>> isLetter '0'
+-- False
+-- >>> isLetter '%'
+-- False
+-- >>> isLetter '♥'
+-- False
+-- >>> isLetter '\31'
+-- False
+--
+-- Ensure that 'isLetter' and 'isAlpha' are equivalent.
+--
+-- >>> let chars = [(chr 0)..]
+-- >>> let letters = map isLetter chars
+-- >>> let alphas = map isAlpha chars
+-- >>> letters == alphas
+-- True
+--
isLetter :: Char -> Bool
isLetter c = case generalCategory c of
UppercaseLetter -> True
@@ -131,8 +275,41 @@ isLetter c = case generalCategory c of
OtherLetter -> True
_ -> False
--- | Selects Unicode mark characters, e.g. accents and the like, which
--- combine with preceding letters.
+-- | Selects Unicode mark characters, for example accents and the
+-- like, which combine with preceding characters.
+--
+-- This function returns 'True' if its argument has one of the
+-- following 'GeneralCategory's, or 'False' otherwise:
+--
+-- * 'NonSpacingMark'
+-- * 'SpacingCombiningMark'
+-- * 'EnclosingMark'
+--
+-- These classes are defined in the
+-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
+-- part of the Unicode standard. The same document defines what is
+-- and is not a \"Mark\".
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> isMark 'a'
+-- False
+-- >>> isMark '0'
+-- False
+--
+-- Combining marks such as accent characters usually need to follow
+-- another character before they become printable:
+--
+-- >>> map isMark "ò"
+-- [False,True]
+--
+-- Puns are not necessarily supported:
+--
+-- >>> isMark '✓'
+-- False
+--
isMark :: Char -> Bool
isMark c = case generalCategory c of
NonSpacingMark -> True
@@ -141,7 +318,41 @@ isMark c = case generalCategory c of
_ -> False
-- | Selects Unicode numeric characters, including digits from various
--- scripts, Roman numerals, etc.
+-- scripts, Roman numerals, et cetera.
+--
+-- This function returns 'True' if its argument has one of the
+-- following 'GeneralCategory's, or 'False' otherwise:
+--
+-- * 'DecimalNumber'
+-- * 'LetterNumber'
+-- * 'OtherNumber'
+--
+-- These classes are defined in the
+-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
+-- part of the Unicode standard. The same document defines what is
+-- and is not a \"Number\".
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> isNumber 'a'
+-- False
+-- >>> isNumber '%'
+-- False
+-- >>> isNumber '3'
+-- True
+--
+-- ASCII @\'0\'@ through @\'9\'@ are all numbers:
+--
+-- >>> and $ map isNumber ['0'..'9']
+-- True
+--
+-- Unicode Roman numerals are \"numbers\" as well:
+--
+-- >>> isNumber 'Ⅸ'
+-- True
+--
isNumber :: Char -> Bool
isNumber c = case generalCategory c of
DecimalNumber -> True
@@ -151,6 +362,40 @@ isNumber c = case generalCategory c of
-- | Selects Unicode punctuation characters, including various kinds
-- of connectors, brackets and quotes.
+--
+-- This function returns 'True' if its argument has one of the
+-- following 'GeneralCategory's, or 'False' otherwise:
+--
+-- * 'ConnectorPunctuation'
+-- * 'DashPunctuation'
+-- * 'OpenPunctuation'
+-- * 'ClosePunctuation'
+-- * 'InitialQuote'
+-- * 'FinalQuote'
+-- * 'OtherPunctuation'
+--
+-- These classes are defined in the
+-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
+-- part of the Unicode standard. The same document defines what is
+-- and is not a \"Punctuation\".
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> isPunctuation 'a'
+-- False
+-- >>> isPunctuation '7'
+-- False
+-- >>> isPunctuation '♥'
+-- False
+-- >>> isPunctuation '"'
+-- True
+-- >>> isPunctuation '?'
+-- True
+-- >>> isPunctuation '—'
+-- True
+--
isPunctuation :: Char -> Bool
isPunctuation c = case generalCategory c of
ConnectorPunctuation -> True
@@ -164,6 +409,39 @@ isPunctuation c = case generalCategory c of
-- | Selects Unicode symbol characters, including mathematical and
-- currency symbols.
+--
+-- This function returns 'True' if its argument has one of the
+-- following 'GeneralCategory's, or 'False' otherwise:
+--
+-- * 'MathSymbol'
+-- * 'CurrencySymbol'
+-- * 'ModifierSymbol'
+-- * 'OtherSymbol'
+--
+-- These classes are defined in the
+-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
+-- part of the Unicode standard. The same document defines what is
+-- and is not a \"Symbol\".
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> isSymbol 'a'
+-- False
+-- >>> isSymbol '6'
+-- False
+-- >>> isSymbol '='
+-- True
+--
+-- The definition of \"math symbol\" may be a little
+-- counter-intuitive depending on one's background:
+--
+-- >>> isSymbol '+'
+-- True
+-- >>> isSymbol '-'
+-- False
+--
isSymbol :: Char -> Bool
isSymbol c = case generalCategory c of
MathSymbol -> True
@@ -173,6 +451,43 @@ isSymbol c = case generalCategory c of
_ -> False
-- | Selects Unicode space and separator characters.
+--
+-- This function returns 'True' if its argument has one of the
+-- following 'GeneralCategory's, or 'False' otherwise:
+--
+-- * 'Space'
+-- * 'LineSeparator'
+-- * 'ParagraphSeparator'
+--
+-- These classes are defined in the
+-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
+-- part of the Unicode standard. The same document defines what is
+-- and is not a \"Separator\".
+--
+-- ==== __Examples__
+--
+-- Basic usage:
+--
+-- >>> isSeparator 'a'
+-- False
+-- >>> isSeparator '6'
+-- False
+-- >>> isSeparator ' '
+-- True
+--
+-- Warning: newlines and tab characters are not considered
+-- separators.
+--
+-- >>> isSeparator '\n'
+-- False
+-- >>> isSeparator '\t'
+-- False
+--
+-- But some more exotic characters are (like HTML's @&nbsp;@):
+--
+-- >>> isSeparator '\160'
+-- True
+--
isSeparator :: Char -> Bool
isSeparator c = case generalCategory c of
Space -> True