1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude
, BangPatterns
, NondecreasingIndentation
#-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.IO.Encoding.Latin1
-- Copyright : (c) The University of Glasgow, 2009
-- License : see libraries/base/LICENSE
--
-- Maintainer : libraries@haskell.org
-- Stability : internal
-- Portability : non-portable
--
-- UTF-32 Codecs for the IO library
--
-- Portions Copyright : (c) Tom Harper 2008-2009,
-- (c) Bryan O'Sullivan 2009,
-- (c) Duncan Coutts 2009
--
-----------------------------------------------------------------------------
module GHC.IO.Encoding.Latin1 (
latin1, mkLatin1,
latin1_checked, mkLatin1_checked,
latin1_decode,
latin1_encode,
latin1_checked_encode,
) where
import GHC.Base
import GHC.Real
import GHC.Num
-- import GHC.IO
import GHC.IO.Buffer
import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
-- -----------------------------------------------------------------------------
-- Latin1
latin1 :: TextEncoding
latin1 = mkLatin1 ErrorOnCodingFailure
mkLatin1 :: CodingFailureMode -> TextEncoding
mkLatin1 cfm = TextEncoding { textEncodingName = "ISO8859-1",
mkTextDecoder = latin1_DF cfm,
mkTextEncoder = latin1_EF cfm }
latin1_DF :: CodingFailureMode -> IO (TextDecoder ())
latin1_DF cfm =
return (BufferCodec {
encode = latin1_decode,
recover = recoverDecode cfm,
close = return (),
getState = return (),
setState = const $ return ()
})
latin1_EF :: CodingFailureMode -> IO (TextEncoder ())
latin1_EF cfm =
return (BufferCodec {
encode = latin1_encode,
recover = recoverEncode cfm,
close = return (),
getState = return (),
setState = const $ return ()
})
latin1_checked :: TextEncoding
latin1_checked = mkLatin1_checked ErrorOnCodingFailure
mkLatin1_checked :: CodingFailureMode -> TextEncoding
mkLatin1_checked cfm = TextEncoding { textEncodingName = "ISO8859-1(checked)",
mkTextDecoder = latin1_DF cfm,
mkTextEncoder = latin1_checked_EF cfm }
latin1_checked_EF :: CodingFailureMode -> IO (TextEncoder ())
latin1_checked_EF cfm =
return (BufferCodec {
encode = latin1_checked_encode,
recover = recoverEncode cfm,
close = return (),
getState = return (),
setState = const $ return ()
})
latin1_decode :: DecodeBuffer
latin1_decode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
= let
loop !ir !ow
| ow >= os = done OutputUnderflow ir ow
| ir >= iw = done InputUnderflow ir ow
| otherwise = do
c0 <- readWord8Buf iraw ir
ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral c0))
loop (ir+1) ow'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
done why !ir !ow = return (why,
if ir == iw then input{ bufL=0, bufR=0 }
else input{ bufL=ir },
output{ bufR=ow })
in
loop ir0 ow0
latin1_encode :: EncodeBuffer
latin1_encode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
= let
done why !ir !ow = return (why,
if ir == iw then input{ bufL=0, bufR=0 }
else input{ bufL=ir },
output{ bufR=ow })
loop !ir !ow
| ow >= os = done OutputUnderflow ir ow
| ir >= iw = done InputUnderflow ir ow
| otherwise = do
(c,ir') <- readCharBuf iraw ir
writeWord8Buf oraw ow (fromIntegral (ord c))
loop ir' (ow+1)
in
loop ir0 ow0
latin1_checked_encode :: EncodeBuffer
latin1_checked_encode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
= let
done why !ir !ow = return (why,
if ir == iw then input{ bufL=0, bufR=0 }
else input{ bufL=ir },
output{ bufR=ow })
loop !ir !ow
| ow >= os = done OutputUnderflow ir ow
| ir >= iw = done InputUnderflow ir ow
| otherwise = do
(c,ir') <- readCharBuf iraw ir
if ord c > 0xff then invalid else do
writeWord8Buf oraw ow (fromIntegral (ord c))
loop ir' (ow+1)
where
invalid = done InvalidSequence ir ow
in
loop ir0 ow0
|