/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
*
* This library is free software: you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library. If not, see .
*
* Authors: Michael Zucchi
*/
/* What should hopefully be a fast mail parser */
/* Do not change this code without asking me (Michael Zucchi) first
*
* There is almost always a reason something was done a certain way.
*/
#include
#include
#include
#include
#include
#include
#include "camel-mempool.h"
#include "camel-mime-filter.h"
#include "camel-mime-parser.h"
#include "camel-mime-utils.h"
#include "camel-stream.h"
#define r(x)
#define h(x)
#define c(x)
#define d(x)
/*#define PRESERVE_HEADERS*/
/*#define PURIFY*/
#define MEMPOOL
#ifdef PURIFY
gint inend_id = -1,
inbuffer_id = -1;
#endif
#define SCAN_BUF 4096 /* size of read buffer */
#define SCAN_HEAD 128 /* headroom guaranteed to be before each read buffer */
/* a little hacky, but i couldn't be bothered renaming everything */
#define _header_scan_state _CamelMimeParserPrivate
#define _PRIVATE(obj) (((CamelMimeParser *)(obj))->priv)
struct _header_scan_state {
/* global state */
camel_mime_parser_state_t state;
/* for building headers during scanning */
gchar *outbuf;
gchar *outptr;
gchar *outend;
gint fd; /* input for a fd input */
CamelStream *stream; /* or for a stream */
GInputStream *input_stream;
gint ioerrno; /* io error state */
/* for scanning input buffers */
gchar *realbuf; /* the real buffer, SCAN_HEAD *2 + SCAN_BUF bytes */
gchar *inbuf; /* points to a subset of the allocated memory, the underflow */
gchar *inptr; /* (upto SCAN_HEAD) is for use by filters so they dont copy all data */
gchar *inend;
gint atleast;
goffset seek; /* current offset to start of buffer */
gint unstep; /* how many states to 'unstep' (repeat the current state) */
guint midline:1; /* are we mid-line interrupted? */
guint scan_from:1; /* do we care about From lines? */
guint scan_pre_from:1; /* do we return pre-from data? */
guint eof:1; /* reached eof? */
goffset start_of_from; /* where from started */
goffset start_of_boundary; /* where the last boundary started */
goffset start_of_headers; /* where headers started from the last scan */
goffset header_start; /* start of last header, or -1 */
/* filters to apply to all content before output */
gint filterid; /* id of next filter */
struct _header_scan_filter *filters;
/* per message/part info */
struct _header_scan_stack *parts;
};
struct _header_scan_stack {
struct _header_scan_stack *parent;
camel_mime_parser_state_t savestate; /* state at invocation of this part */
#ifdef MEMPOOL
CamelMemPool *pool; /* memory pool to keep track of headers/etc at this level */
#endif
struct _camel_header_raw *headers; /* headers for this part */
CamelContentType *content_type;
/* I dont use GString's casue you can't efficiently append a buffer to them */
GByteArray *pretext; /* for multipart types, save the pre-boundary data here */
GByteArray *posttext; /* for multipart types, save the post-boundary data here */
gint prestage; /* used to determine if it is a pre-boundary or post-boundary data segment */
GByteArray *from_line; /* the from line */
gchar *boundary; /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
gint boundarylen; /* actual length of boundary, including leading -- if there is one */
gint boundarylenfinal; /* length of boundary, including trailing -- if there is one */
gint atleast; /* the biggest boundary from here to the parent */
};
struct _header_scan_filter {
struct _header_scan_filter *next;
gint id;
CamelMimeFilter *filter;
};
static void folder_scan_reset (struct _header_scan_state *s);
static void folder_scan_step (struct _header_scan_state *s, gchar **databuffer, gsize *datalength);
static void folder_scan_drop_step (struct _header_scan_state *s);
static gint folder_scan_init_with_fd (struct _header_scan_state *s, gint fd);
static gint folder_scan_init_with_stream (struct _header_scan_state *s, CamelStream *stream, GError **error);
static struct _header_scan_state *folder_scan_init (void);
static void folder_scan_close (struct _header_scan_state *s);
static struct _header_scan_stack *folder_scan_content (struct _header_scan_state *s, gint *lastone, gchar **data, gsize *length);
static struct _header_scan_stack *folder_scan_header (struct _header_scan_state *s, gint *lastone);
static gint folder_scan_skip_line (struct _header_scan_state *s, GByteArray *save);
static goffset folder_seek (struct _header_scan_state *s, goffset offset, gint whence);
static goffset folder_tell (struct _header_scan_state *s);
static gint folder_read (struct _header_scan_state *s);
static void folder_push_part (struct _header_scan_state *s, struct _header_scan_stack *h);
#ifdef MEMPOOL
static void header_append_mempool (struct _header_scan_state *s, struct _header_scan_stack *h, gchar *header, gint offset);
#endif
#if d(!)0
static gchar *states[] = {
"CAMEL_MIME_PARSER_STATE_INITIAL",
"CAMEL_MIME_PARSER_STATE_PRE_FROM", /* pre-from data */
"CAMEL_MIME_PARSER_STATE_FROM", /* got 'From' line */
"CAMEL_MIME_PARSER_STATE_HEADER", /* toplevel header */
"CAMEL_MIME_PARSER_STATE_BODY", /* scanning body of message */
"CAMEL_MIME_PARSER_STATE_MULTIPART", /* got multipart header */
"CAMEL_MIME_PARSER_STATE_MESSAGE", /* rfc822/news message */
"CAMEL_MIME_PARSER_STATE_PART", /* part of a multipart */
"CAMEL_MIME_PARSER_STATE_EOF", /* end of file */
"CAMEL_MIME_PARSER_STATE_PRE_FROM_END",
"CAMEL_MIME_PARSER_STATE_FROM_END",
"CAMEL_MIME_PARSER_STATE_HEAER_END",
"CAMEL_MIME_PARSER_STATE_BODY_END",
"CAMEL_MIME_PARSER_STATE_MULTIPART_END",
"CAMEL_MIME_PARSER_STATE_MESSAGE_END",
};
#endif
G_DEFINE_TYPE (CamelMimeParser, camel_mime_parser, G_TYPE_OBJECT)
static void
mime_parser_finalize (GObject *object)
{
struct _header_scan_state *s = _PRIVATE (object);
#ifdef PURIFY
purify_watch_remove_all ();
#endif
folder_scan_close (s);
/* Chain up to parent's finalize() method. */
G_OBJECT_CLASS (camel_mime_parser_parent_class)->finalize (object);
}
static void
camel_mime_parser_class_init (CamelMimeParserClass *class)
{
GObjectClass *object_class;
object_class = G_OBJECT_CLASS (class);
object_class->finalize = mime_parser_finalize;
}
static void
camel_mime_parser_init (CamelMimeParser *parser)
{
parser->priv = folder_scan_init ();
}
/**
* camel_mime_parser_new:
*
* Create a new CamelMimeParser object.
*
* Returns: A new CamelMimeParser widget.
**/
CamelMimeParser *
camel_mime_parser_new (void)
{
return g_object_new (CAMEL_TYPE_MIME_PARSER, NULL);
}
/**
* camel_mime_parser_filter_add:
* @m:
* @mf:
*
* Add a filter that will be applied to any body content before it is passed
* to the caller. Filters may be pipelined to perform multi-pass operations
* on the content, and are applied in the order they were added.
*
* Note that filters are only applied to the body content of messages, and once
* a filter has been set, all content returned by a filter_step() with a state
* of CAMEL_MIME_PARSER_STATE_BODY will have passed through the filter.
*
* Returns: An id that may be passed to filter_remove() to remove
* the filter, or -1 if the operation failed.
*
* Since: 2.22
**/
gint
camel_mime_parser_filter_add (CamelMimeParser *m,
CamelMimeFilter *mf)
{
struct _header_scan_state *s = _PRIVATE (m);
struct _header_scan_filter *f, *new;
new = g_malloc (sizeof (*new));
new->filter = mf;
new->id = s->filterid++;
if (s->filterid == -1)
s->filterid++;
new->next = NULL;
g_object_ref (G_OBJECT (mf));
/* yes, this is correct, since 'next' is the first element of the struct */
f = (struct _header_scan_filter *) &s->filters;
while (f->next)
f = f->next;
f->next = new;
return new->id;
}
/**
* camel_mime_parser_filter_remove:
* @m:
* @id:
*
* Remove a processing filter from the pipeline. There is no
* restriction on the order the filters can be removed.
*
* Since: 2.22
**/
void
camel_mime_parser_filter_remove (CamelMimeParser *m,
gint id)
{
struct _header_scan_state *s = _PRIVATE (m);
struct _header_scan_filter *f, *old;
f = (struct _header_scan_filter *) &s->filters;
while (f && f->next) {
old = f->next;
if (old->id == id) {
g_object_unref (old->filter);
f->next = old->next;
g_free (old);
/* there should only be a single matching id, but
* scan the whole lot anyway */
}
f = f->next;
}
}
/**
* camel_mime_parser_header:
* @m:
* @name: Name of header.
* @offset: Pointer that can receive the offset of the header in
* the stream from the start of parsing.
*
* Lookup a header by name.
*
* Returns: The header value, or NULL if the header is not
* defined.
**/
const gchar *
camel_mime_parser_header (CamelMimeParser *m,
const gchar *name,
gint *offset)
{
struct _header_scan_state *s = _PRIVATE (m);
if (s->parts && s->parts->headers)
return camel_header_raw_find (&s->parts->headers, name, offset);
return NULL;
}
/**
* camel_mime_parser_headers_raw:
* @m:
*
* Get the list of the raw headers which are defined for the
* current state of the parser. These headers are valid
* until the next call to parser_step(), or parser_drop_step().
*
* Returns: The raw headers, or NULL if there are no headers
* defined for the current part or state. These are READ ONLY.
*
* Since: 2.22
**/
struct _camel_header_raw *
camel_mime_parser_headers_raw (CamelMimeParser *m)
{
struct _header_scan_state *s = _PRIVATE (m);
if (s->parts)
return s->parts->headers;
return NULL;
}
static const gchar *
byte_array_to_string (GByteArray *array)
{
if (array == NULL)
return NULL;
if (array->len == 0 || array->data[array->len - 1] != '\0')
g_byte_array_append (array, (guint8 *) "", 1);
return (const gchar *) array->data;
}
/**
* camel_mime_parser_preface:
* @m:
*
* Retrieve the preface text for the current multipart.
* Can only be used when the state is CAMEL_MIME_PARSER_STATE_MULTIPART_END.
*
* Returns: The preface text, or NULL if there wasn't any.
*
* Since: 2.22
**/
const gchar *
camel_mime_parser_preface (CamelMimeParser *m)
{
struct _header_scan_state *s = _PRIVATE (m);
if (s->parts)
return byte_array_to_string (s->parts->pretext);
return NULL;
}
/**
* camel_mime_parser_postface:
* @m:
*
* Retrieve the postface text for the current multipart.
* Only returns valid data when the current state if
* CAMEL_MIME_PARSER_STATE_MULTIPART_END.
*
* Returns: The postface text, or NULL if there wasn't any.
*
* Since: 2.22
**/
const gchar *
camel_mime_parser_postface (CamelMimeParser *m)
{
struct _header_scan_state *s = _PRIVATE (m);
if (s->parts)
return byte_array_to_string (s->parts->posttext);
return NULL;
}
/**
* camel_mime_parser_from_line:
* @m:
*
* Get the last scanned "From " line, from a recently scanned from.
* This should only be called in the CAMEL_MIME_PARSER_STATE_FROM state. The
* from line will include the closing \n found (if there was one).
*
* The return value will remain valid while in the CAMEL_MIME_PARSER_STATE_FROM
* state, or any deeper state.
*
* Returns: The From line, or NULL if called out of context.
*
* Since: 2.22
**/
const gchar *
camel_mime_parser_from_line (CamelMimeParser *m)
{
struct _header_scan_state *s = _PRIVATE (m);
if (s->parts)
return byte_array_to_string (s->parts->from_line);
return NULL;
}
/**
* camel_mime_parser_init_with_fd:
* @m:
* @fd: A valid file descriptor.
*
* Initialise the scanner with an fd. The scanner's offsets
* will be relative to the current file position of the file
* descriptor. As a result, seekable descritors should
* be seeked using the parser seek functions.
*
* Returns: Returns -1 on error.
**/
gint
camel_mime_parser_init_with_fd (CamelMimeParser *m,
gint fd)
{
struct _header_scan_state *s = _PRIVATE (m);
return folder_scan_init_with_fd (s, fd);
}
/**
* camel_mime_parser_init_with_stream:
* @m:
* @stream:
* @error: return location for a #GError, or %NULL
*
* Initialise the scanner with a source stream. The scanner's
* offsets will be relative to the current file position of
* the stream. As a result, seekable streams should only
* be seeked using the parser seek function.
*
* Returns: -1 on error.
**/
gint
camel_mime_parser_init_with_stream (CamelMimeParser *parser,
CamelStream *stream,
GError **error)
{
struct _header_scan_state *s = _PRIVATE (parser);
return folder_scan_init_with_stream (s, stream, error);
}
/**
* camel_mime_parser_init_with_input_stream:
* @parser: a #CamelMimeParser
* @input_stream: a #GInputStream
*
* Initialize the scanner with @input_stream. The scanner's offsets will
* be relative to the current file position of the stream. As a result,
* seekable streams should only be seeked using the parser seek function.
*
* Since: 3.12
**/
void
camel_mime_parser_init_with_input_stream (CamelMimeParser *parser,
GInputStream *input_stream)
{
struct _header_scan_state *s = _PRIVATE (parser);
folder_scan_reset (s);
s->input_stream = g_object_ref (input_stream);
}
/**
* camel_mime_parser_init_with_bytes:
* @parser: a #CamelMimeParser
* @bytes: a #GBytes containing the message content
*
* Convenience function creates a #GMemoryInputStream from @bytes and hands
* it off to camel_mime_parser_init_with_input_stream().
*
* Since: 3.12
**/
void
camel_mime_parser_init_with_bytes (CamelMimeParser *parser,
GBytes *bytes)
{
GInputStream *input_stream;
g_return_if_fail (CAMEL_IS_MIME_PARSER (parser));
g_return_if_fail (bytes != NULL);
input_stream = g_memory_input_stream_new_from_bytes (bytes);
camel_mime_parser_init_with_input_stream (parser, input_stream);
g_object_unref (input_stream);
}
/**
* camel_mime_parser_scan_from:
* @parser: MIME parser object
* @scan_from: %TRUE if the scanner should scan From lines.
*
* Tell the scanner if it should scan "^From " lines or not.
*
* If the scanner is scanning from lines, two additional
* states CAMEL_MIME_PARSER_STATE_FROM and CAMEL_MIME_PARSER_STATE_FROM_END will be returned
* to the caller during parsing.
*
* This may also be preceeded by an optional
* CAMEL_MIME_PARSER_STATE_PRE_FROM state which contains the scanned data
* found before the From line is encountered. See also
* scan_pre_from().
**/
void
camel_mime_parser_scan_from (CamelMimeParser *parser,
gboolean scan_from)
{
struct _header_scan_state *s = _PRIVATE (parser);
s->scan_from = scan_from;
}
/**
* camel_mime_parser_scan_pre_from:
* @parser: MIME parser object
* @scan_pre_from: %TRUE if we want to get pre-from data.
*
* Tell the scanner whether we want to know abou the pre-from
* data during a scan. If we do, then we may get an additional
* state CAMEL_MIME_PARSER_STATE_PRE_FROM which returns the specified data.
**/
void
camel_mime_parser_scan_pre_from (CamelMimeParser *parser,
gboolean scan_pre_from)
{
struct _header_scan_state *s = _PRIVATE (parser);
s->scan_pre_from = scan_pre_from;
}
/**
* camel_mime_parser_content_type:
* @parser: MIME parser object
*
* Get the content type defined in the current part.
*
* Returns: A content_type structure, or NULL if there
* is no content-type defined for this part of state of the
* parser.
**/
CamelContentType *
camel_mime_parser_content_type (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
/* FIXME: should this search up until it's found the 'right'
* content-type? can it? */
if (s->parts)
return s->parts->content_type;
return NULL;
}
/**
* camel_mime_parser_unstep:
* @parser: MIME parser object
*
* Cause the last step operation to repeat itself. If this is
* called repeated times, then the same step will be repeated
* that many times.
*
* Note that it is not possible to scan back using this function,
* only to have a way of peeking the next state.
**/
void
camel_mime_parser_unstep (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
s->unstep++;
}
/**
* camel_mime_parser_drop_step:
* @parser: MIME parser object
*
* Drop the last step call. This should only be used
* in conjunction with seeking of the stream as the
* stream may be in an undefined state relative to the
* state of the parser.
*
* Use this call with care.
**/
void
camel_mime_parser_drop_step (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
s->unstep = 0;
folder_scan_drop_step (s);
}
/**
* camel_mime_parser_step:
* @parser: MIME parser object
* @databuffer: Pointer to accept a pointer to the data
* associated with this step (if any). May be %NULL,
* in which case datalength is also ingored.
* @datalength: Pointer to accept a pointer to the data
* length associated with this step (if any).
*
* Parse the next part of the MIME message. If _unstep()
* has been called, then continue to return the same state
* for that many calls.
*
* If the step is CAMEL_MIME_PARSER_STATE_BODY then the databuffer and datalength
* pointers will be setup to point to the internal data buffer
* of the scanner and may be processed as required. Any
* filters will have already been applied to this data.
*
* Refer to the state diagram elsewhere for a full listing of
* the states an application is gauranteed to get from the
* scanner.
*
* Returns: The current new state of the parser
* is returned.
**/
camel_mime_parser_state_t
camel_mime_parser_step (CamelMimeParser *parser,
gchar **databuffer,
gsize *datalength)
{
struct _header_scan_state *s = _PRIVATE (parser);
d (printf ("OLD STATE: '%s' :\n", states[s->state]));
if (s->unstep <= 0) {
gchar *dummy;
gsize dummylength;
if (databuffer == NULL) {
databuffer = &dummy;
datalength = &dummylength;
}
folder_scan_step (s, databuffer, datalength);
} else
s->unstep--;
d (printf ("NEW STATE: '%s' :\n", states[s->state]));
return s->state;
}
/**
* camel_mime_parser_read:
* @parser: MIME parser object
* @databuffer:
* @len:
* @error: return location for a #GError, or %NULL
*
* Read at most @len bytes from the internal mime parser buffer.
*
* Returns the address of the internal buffer in @databuffer,
* and the length of useful data.
*
* @len may be specified as %G_MAXSSIZE, in which case you will
* get the full remainder of the buffer at each call.
*
* Note that no parsing of the data read through this function
* occurs, so no state changes occur, but the seek position
* is updated appropriately.
*
* Returns: The number of bytes available, or -1 on error.
**/
gssize
camel_mime_parser_read (CamelMimeParser *parser,
const gchar **databuffer,
gssize len,
GError **error)
{
struct _header_scan_state *s = _PRIVATE (parser);
gintptr there;
if (len == 0)
return 0;
d (printf ("parser::read() reading %d bytes\n", len));
there = MIN (s->inend - s->inptr, len);
d (printf ("parser::read() there = %d bytes\n", there));
if (there > 0) {
*databuffer = s->inptr;
s->inptr += there;
return there;
}
if (folder_read (s) == -1) {
gint err = camel_mime_parser_errno (parser);
g_set_error (
error, G_IO_ERROR,
g_io_error_from_errno (err),
"%s", g_strerror (err));
return -1;
}
there = MIN (s->inend - s->inptr, len);
d (printf ("parser::read() had to re-read, now there = %d bytes\n", there));
*databuffer = s->inptr;
s->inptr += there;
return there;
}
/**
* camel_mime_parser_tell:
* @parser: MIME parser object
*
* Return the current scanning offset. The meaning of this
* value will depend on the current state of the parser.
*
* An incomplete listing of the states:
*
* CAMEL_MIME_PARSER_STATE_INITIAL, The start of the current message.
* CAMEL_MIME_PARSER_STATE_HEADER, CAMEL_MIME_PARSER_STATE_MESSAGE, CAMEL_MIME_PARSER_STATE_MULTIPART, the character
* position immediately after the end of the header.
* CAMEL_MIME_PARSER_STATE_BODY, Position within the message of the start
* of the current data block.
* CAMEL_MIME_PARSER_STATE_*_END, The position of the character starting
* the next section of the scan (the last position + 1 of
* the respective current state).
*
* Returns: See above.
*
* Since: 2.22
**/
goffset
camel_mime_parser_tell (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
return folder_tell (s);
}
/**
* camel_mime_parser_tell_start_headers:
* @parser: MIME parser object
*
* Find out the position within the file of where the
* headers started, this is cached by the parser
* at the time.
*
* Returns: The header start position, or -1 if
* no headers were scanned in the current state.
*
* Since: 2.22
**/
goffset
camel_mime_parser_tell_start_headers (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
return s->start_of_headers;
}
/**
* camel_mime_parser_tell_start_from:
* @parser: MIME parser object
*
* If the parser is scanning From lines, then this returns
* the position of the start of the From line.
*
* Returns: The start of the from line, or -1 if there
* was no From line, or From lines are not being scanned.
*
* Since: 2.22
**/
goffset
camel_mime_parser_tell_start_from (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
return s->start_of_from;
}
/**
* camel_mime_parser_tell_start_boundary:
* @parser: MIME parser object
*
* When parsing a multipart, this returns the start of the last
* boundary.
*
* Returns: The start of the boundary, or -1 if there
* was no boundary encountered yet.
*
* Since: 2.22
**/
goffset
camel_mime_parser_tell_start_boundary (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
return s->start_of_boundary;
}
/**
* camel_mime_parser_seek:
* @parser: MIME parser object
* @offset: Number of bytes to offset the seek by.
* @whence: SEEK_SET, SEEK_CUR, SEEK_END
*
* Reset the source position to a known value.
*
* Note that if the source stream/descriptor was not
* positioned at 0 to begin with, and an absolute seek
* is specified (whence != SEEK_CUR), then the seek
* position may not match the desired seek position.
*
* Returns: The new seek offset, or -1 on
* an error (for example, trying to seek on a non-seekable
* stream or file descriptor).
*
* Since: 2.22
**/
goffset
camel_mime_parser_seek (CamelMimeParser *parser,
goffset offset,
gint whence)
{
struct _header_scan_state *s = _PRIVATE (parser);
return folder_seek (s, offset, whence);
}
/**
* camel_mime_parser_state:
* @parser: MIME parser object
*
* Get the current parser state.
*
* Returns: The current parser state.
**/
camel_mime_parser_state_t
camel_mime_parser_state (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
return s->state;
}
/**
* camel_mime_parser_push_state:
* @mp: MIME parser object
* @newstate: New state
* @boundary: Boundary marker for state.
*
* Pre-load a new parser state. Used to post-parse multipart content
* without headers.
**/
void
camel_mime_parser_push_state (CamelMimeParser *mp,
camel_mime_parser_state_t newstate,
const gchar *boundary)
{
struct _header_scan_stack *h;
struct _header_scan_state *s = _PRIVATE (mp);
gsize boundary_len;
h = g_malloc0 (sizeof (*h));
h->boundarylen = strlen (boundary) + 2;
h->boundarylenfinal = h->boundarylen + 2;
boundary_len = h->boundarylen + 3;
h->boundary = g_malloc (boundary_len);
g_snprintf (h->boundary, boundary_len, "--%s--", boundary);
folder_push_part (s, h);
s->state = newstate;
}
/**
* camel_mime_parser_stream:
* @parser: MIME parser object
*
* Get the stream, if any, the parser has been initialised
* with. May be used to setup sub-streams, but should not
* be read from directly (without saving and restoring
* the seek position in between).
*
* Returns: The stream from _init_with_stream(), or NULL
* if the parser is reading from a file descriptor or is
* uninitialised.
**/
CamelStream *
camel_mime_parser_stream (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
return s->stream;
}
/* Return errno of the parser, incase any error occurred during processing */
gint
camel_mime_parser_errno (CamelMimeParser *parser)
{
struct _header_scan_state *s = _PRIVATE (parser);
return s->ioerrno;
}
/* ********************************************************************** */
/* Implementation */
/* ********************************************************************** */
/* read the next bit of data, ensure there is enough room 'atleast' bytes */
static gint
folder_read (struct _header_scan_state *s)
{
gint len;
gint inoffset;
if (s->inptr < s->inend - s->atleast || s->eof)
return s->inend - s->inptr;
#ifdef PURIFY
purify_watch_remove (inend_id);
purify_watch_remove (inbuffer_id);
#endif
/* check for any remaning bytes (under the atleast limit( */
inoffset = s->inend - s->inptr;
if (inoffset > 0) {
memmove (s->inbuf, s->inptr, inoffset);
}
if (s->stream) {
len = camel_stream_read (
s->stream, s->inbuf + inoffset, SCAN_BUF - inoffset, NULL, NULL);
} else if (s->input_stream != NULL) {
len = g_input_stream_read (
s->input_stream, s->inbuf + inoffset,
SCAN_BUF - inoffset, NULL, NULL);
} else {
len = read (s->fd, s->inbuf + inoffset, SCAN_BUF - inoffset);
}
r (printf ("read %d bytes, offset = %d\n", len, inoffset));
if (len >= 0) {
/* add on the last read block */
s->seek += s->inptr - s->inbuf;
s->inptr = s->inbuf;
s->inend = s->inbuf + len + inoffset;
s->eof = (len == 0);
r (printf ("content = %d '%.*s'\n",s->inend - s->inptr, s->inend - s->inptr, s->inptr));
} else {
s->ioerrno = errno ? errno : EIO;
}
g_return_val_if_fail (s->inptr <= s->inend, 0);
#ifdef PURIFY
inend_id = purify_watch (&s->inend);
inbuffer_id = purify_watch_n (s->inend + 1, SCAN_HEAD - 1, "rw");
#endif
r (printf ("content = %d '%.*s'\n", s->inend - s->inptr, s->inend - s->inptr, s->inptr));
/* set a sentinal, for the inner loops to check against */
s->inend[0] = '\n';
return s->inend - s->inptr;
}
/* return the current absolute position of the data pointer */
static goffset
folder_tell (struct _header_scan_state *s)
{
return s->seek + (s->inptr - s->inbuf);
}
/*
* need some way to prime the parser state, so this actually works for
* other than top-level messages
*/
static goffset
folder_seek (struct _header_scan_state *s,
goffset offset,
gint whence)
{
goffset newoffset;
if (s->stream) {
if (G_IS_SEEKABLE (s->stream)) {
/* NOTE: assumes whence seekable stream == whence libc, which is probably
* the case (or bloody well should've been) */
g_seekable_seek (
G_SEEKABLE (s->stream),
offset, whence, NULL, NULL);
newoffset = g_seekable_tell (G_SEEKABLE (s->stream));
} else {
newoffset = -1;
errno = EINVAL;
}
} else if (s->input_stream != NULL) {
if (G_IS_SEEKABLE (s->input_stream)) {
/* NOTE: assumes whence seekable stream == whence libc, which is probably
* the case (or bloody well should've been) */
g_seekable_seek (
G_SEEKABLE (s->input_stream),
offset, whence, NULL, NULL);
newoffset = g_seekable_tell (G_SEEKABLE (s->input_stream));
} else {
newoffset = -1;
errno = EINVAL;
}
} else {
newoffset = lseek (s->fd, offset, whence);
}
#ifdef PURIFY
purify_watch_remove (inend_id);
purify_watch_remove (inbuffer_id);
#endif
if (newoffset != -1) {
s->seek = newoffset;
s->inptr = s->inbuf;
s->inend = s->inbuf;
s->eof = FALSE;
} else {
s->ioerrno = errno ? errno : EIO;
}
#ifdef PURIFY
inend_id = purify_watch (&s->inend);
inbuffer_id = purify_watch_n (s->inend + 1, SCAN_HEAD - 1, "rw");
#endif
return newoffset;
}
static void
folder_push_part (struct _header_scan_state *s,
struct _header_scan_stack *h)
{
if (s->parts && s->parts->atleast > h->boundarylenfinal)
h->atleast = s->parts->atleast;
else
h->atleast = MAX (h->boundarylenfinal, 1);
h->parent = s->parts;
s->parts = h;
}
static void
folder_pull_part (struct _header_scan_state *s)
{
struct _header_scan_stack *h;
h = s->parts;
if (h) {
s->parts = h->parent;
g_free (h->boundary);
#ifdef MEMPOOL
camel_mempool_destroy (h->pool);
#else
camel_header_raw_clear (&h->headers);
#endif
camel_content_type_unref (h->content_type);
if (h->pretext)
g_byte_array_free (h->pretext, TRUE);
if (h->posttext)
g_byte_array_free (h->posttext, TRUE);
if (h->from_line)
g_byte_array_free (h->from_line, TRUE);
g_free (h);
} else {
g_warning ("Header stack underflow!\n");
}
}
static gint
folder_scan_skip_line (struct _header_scan_state *s,
GByteArray *save)
{
gint atleast = s->atleast;
register gchar *inptr, *inend, c;
gint len;
s->atleast = 1;
d (printf ("skipping line\n"));
while ( (len = folder_read (s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */
inptr = s->inptr;
inend = s->inend;
c = -1;
while (inptr < inend
&& (c = *inptr++) != '\n') {
d (printf ("(%2x,%c)", c, isprint (c) ? c : '.'));
;
}
if (save)
g_byte_array_append (save, (guint8 *) s->inptr, inptr - s->inptr);
s->inptr = inptr;
if (c == '\n') {
s->atleast = atleast;
return 0;
}
}
d (printf ("couldn't find end of line?\n"));
s->atleast = atleast;
return -1; /* not found */
}
/* TODO: Is there any way to make this run faster? It gets called a lot ... */
static struct _header_scan_stack *
folder_boundary_check (struct _header_scan_state *s,
const gchar *boundary,
gint *lastone)
{
struct _header_scan_stack *part;
gint len = s->inend - boundary; /* make sure we dont access past the buffer */
h (printf ("checking boundary marker upto %d bytes\n", len));
part = s->parts;
while (part) {
h (printf (" boundary: %s\n", part->boundary));
h (printf (" against: '%.*s'\n", part->boundarylen, boundary));
if (part->boundary
&& part->boundarylen <= len
&& memcmp (boundary, part->boundary, part->boundarylen) == 0) {
h (printf ("matched boundary: %s\n", part->boundary));
/* again, make sure we're in range */
if (part->boundarylenfinal <= len) {
gint extra = part->boundarylenfinal - part->boundarylen;
/* check the extra stuff on an final boundary, normally -- for mime parts */
if (extra > 0) {
*lastone = memcmp(&boundary[part->boundarylen],
&part->boundary[part->boundarylen],
extra) == 0;
} else {
*lastone = TRUE;
}
h (printf ("checking lastone = %s\n", *lastone?"TRUE":"FALSE"));
} else {
h (printf ("not enough room to check last one?\n"));
*lastone = FALSE;
}
/*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/
return part;
}
part = part->parent;
}
return NULL;
}
#ifdef MEMPOOL
static void
header_append_mempool (struct _header_scan_state *s,
struct _header_scan_stack *h,
gchar *header,
gint offset)
{
struct _camel_header_raw *l, *n;
gchar *content;
content = strchr (header, ':');
if (content) {
register gint len;
n = camel_mempool_alloc (h->pool, sizeof (*n));
n->next = NULL;
len = content - header;
n->name = camel_mempool_alloc (h->pool, len + 1);
memcpy (n->name, header, len);
n->name[len] = 0;
content++;
len = s->outptr - content;
n->value = camel_mempool_alloc (h->pool, len + 1);
memcpy (n->value, content, len);
n->value[len] = 0;
n->offset = offset;
l = (struct _camel_header_raw *) &h->headers;
while (l->next) {
l = l->next;
}
l->next = n;
}
}
#define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c))
#endif
/* Copy the string start->inptr into the header buffer (s->outbuf),
* grow if necessary
* remove trailing \r chars (\n's assumed already removed)
* and track the start offset of the header */
/* Basically an optimised version of g_byte_array_append() */
#define header_append(s, start, inptr) \
{ \
register gintptr headerlen = inptr - start; \
\
if (headerlen > 0) { \
if (headerlen >= (s->outend - s->outptr)) { \
register gchar *outnew; \
register gintptr olen = ((s->outend - s->outbuf) + headerlen) * 2 + 1; \
outnew = g_realloc (s->outbuf, olen); \
s->outptr = s->outptr - s->outbuf + outnew; \
s->outbuf = outnew; \
s->outend = outnew + olen; \
} \
if (start[headerlen - 1] == '\r') \
headerlen--; \
memcpy (s->outptr, start, headerlen); \
s->outptr += headerlen; \
} \
if (s->header_start == -1) \
s->header_start = (start - s->inbuf) + s->seek; \
}
static struct _header_scan_stack *
folder_scan_header (struct _header_scan_state *s,
gint *lastone)
{
gint atleast = s->atleast, newatleast;
gchar *start = NULL;
gint len;
struct _header_scan_stack *h;
gchar *inend;
register gchar *inptr;
h (printf ("scanning first bit\n"));
h = g_malloc0 (sizeof (*h));
#ifdef MEMPOOL
h->pool = camel_mempool_new (8192, 4096, CAMEL_MEMPOOL_ALIGN_STRUCT);
#endif
if (s->parts)
newatleast = s->parts->atleast;
else
newatleast = 1;
*lastone = FALSE;
do {
s->atleast = newatleast;
h (printf ("atleast = %d\n", s->atleast));
while ((len = folder_read (s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
inptr = s->inptr;
inend = s->inend - s->atleast + 1;
while (inptr < inend) {
start = inptr;
if (!s->midline) {
if (folder_boundary_check (s, inptr, lastone)) {
if ((s->outptr > s->outbuf))
goto header_truncated; /* may not actually be truncated */
goto header_done;
}
}
/* goto next line/sentinal */
while ((*inptr++) != '\n')
;
g_return_val_if_fail (inptr <= s->inend + 1, NULL);
/* check for sentinal or real end of line */
if (inptr > inend) {
h (printf ("not at end of line yet, going further\n"));
/* didn't find end of line within our allowed area */
inptr = inend;
s->midline = TRUE;
header_append (s, start, inptr);
} else {
h (printf ("got line part: '%.*s'\n", inptr - 1 - start, start));
/* got a line, strip and add it, process it */
s->midline = FALSE;
header_append (s, start, inptr - 1);
/* check for end of headers */
if (s->outbuf == s->outptr)
goto header_done;
/* check for continuation/compress headers, we have atleast 1 gchar here to work with */
if (inptr[0] == ' ' || inptr[0] == '\t') {
h (printf ("continuation\n"));
#ifdef PRESERVE_HEADERS
if (inptr - 1 >= start) {
start = inptr - 1;
header_append (s, start, inptr);
}
#endif
#ifndef PRESERVE_HEADERS
/* TODO: this wont catch multiple space continuation across a read boundary, but
* that is assumed rare, and not fatal anyway */
do
inptr++;
while (*inptr == ' ' || *inptr == '\t');
inptr--;
*inptr = ' ';
#endif
} else {
/* otherwise, complete header, add it */
s->outptr[0] = 0;
h (printf ("header '%s' at %d\n", s->outbuf, (gint) s->header_start));
header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
s->outptr = s->outbuf;
s->header_start = -1;
}
}
}
s->inptr = inptr;
}
h (printf ("end of file? read %d bytes\n", len));
newatleast = 1;
} while (s->atleast > 1);
if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
start = s->inptr;
inptr = s->inend;
if (inptr > start) {
if (inptr[-1] == '\n')
inptr--;
}
goto header_truncated;
}
s->atleast = atleast;
return h;
header_truncated:
header_append (s, start, inptr);
s->outptr[0] = 0;
if (s->outbuf == s->outptr)
goto header_done;
header_raw_append_parse (&h->headers, s->outbuf, s->header_start);
s->outptr = s->outbuf;
header_done:
s->inptr = inptr;
s->atleast = atleast;
s->header_start = -1;
return h;
}
static struct _header_scan_stack *
folder_scan_content (struct _header_scan_state *s,
gint *lastone,
gchar **data,
gsize *length)
{
gint atleast = s->atleast, newatleast;
register gchar *inptr;
gchar *inend;
gchar *start;
gint len;
struct _header_scan_stack *part;
gint onboundary = FALSE;
c (printf ("scanning content\n"));
part = s->parts;
if (part)
newatleast = part->atleast;
else
newatleast = 1;
*lastone = FALSE;
c (printf ("atleast = %d\n", newatleast));
do {
s->atleast = newatleast;
while ((len = folder_read (s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
inptr = s->inptr;
if (s->eof)
inend = s->inend;
else
inend = s->inend - s->atleast + 1;
start = inptr;
c (printf ("inptr = %p, inend = %p\n", inptr, inend));
while (inptr < inend) {
if (!s->midline
&& (part = folder_boundary_check (s, inptr, lastone))) {
onboundary = TRUE;
/* since we truncate the boundary data, we need at least 1 gchar here spare,
* to remain in the same state */
if ( (inptr - start) > 1)
goto content;
/* otherwise, jump to the state of the boundary we actually found */
goto normal_exit;
}
/* goto the next line */
while ((*inptr++) != '\n')
;
/* check the sentinal, if we went past the atleast limit, and reset it to there */
if (inptr > inend) {
s->midline = TRUE;
inptr = inend;
} else {
s->midline = FALSE;
}
}
goto content;
}
newatleast = 1;
} while (s->atleast > 1);
c (printf ("length read = %d\n", len));
if (s->inend > s->inptr) {
start = s->inptr;
inptr = s->inend;
goto content;
}
*length = 0;
*data = s->inptr;
s->atleast = atleast;
return NULL;
content:
/* treat eof as the last boundary in From mode */
if (s->scan_from && s->eof && s->atleast <= 1) {
onboundary = TRUE;
part = NULL;
} else {
part = s->parts;
}
normal_exit:
s->atleast = atleast;
s->inptr = inptr;
*data = start;
/* if we hit a boundary, we should not include the closing \n */
if (onboundary && (inptr - start) > 0)
*length = inptr-start-1;
else
*length = inptr-start;
/*printf("got %scontent: '%.*s'\n", s->midline?"partial ":"", inptr-start, start);*/
return part;
}
static void
folder_scan_close (struct _header_scan_state *s)
{
g_free (s->realbuf);
g_free (s->outbuf);
while (s->parts)
folder_pull_part (s);
if (s->fd != -1)
close (s->fd);
g_clear_object (&s->stream);
g_clear_object (&s->input_stream);
g_free (s);
}
static struct _header_scan_state *
folder_scan_init (void)
{
struct _header_scan_state *s;
s = g_malloc (sizeof (*s));
s->fd = -1;
s->stream = NULL;
s->input_stream = NULL;
s->ioerrno = 0;
s->outbuf = g_malloc (1024);
s->outptr = s->outbuf;
s->outend = s->outbuf + 1024;
s->realbuf = g_malloc0 (SCAN_BUF + SCAN_HEAD * 2);
s->inbuf = s->realbuf + SCAN_HEAD;
s->inptr = s->inbuf;
s->inend = s->inbuf;
s->atleast = 0;
s->seek = 0; /* current character position in file of the last read block */
s->unstep = 0;
s->header_start = -1;
s->start_of_from = -1;
s->start_of_headers = -1;
s->start_of_boundary = -1;
s->midline = FALSE;
s->scan_from = FALSE;
s->scan_pre_from = FALSE;
s->eof = FALSE;
s->filters = NULL;
s->filterid = 1;
s->parts = NULL;
s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
return s;
}
static void
drop_states (struct _header_scan_state *s)
{
while (s->parts) {
folder_scan_drop_step (s);
}
s->unstep = 0;
s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
}
static void
folder_scan_reset (struct _header_scan_state *s)
{
drop_states (s);
s->inend = s->inbuf;
s->inptr = s->inbuf;
s->inend[0] = '\n';
if (s->fd != -1) {
close (s->fd);
s->fd = -1;
}
g_clear_object (&s->stream);
g_clear_object (&s->input_stream);
s->ioerrno = 0;
s->eof = FALSE;
}
static gint
folder_scan_init_with_fd (struct _header_scan_state *s,
gint fd)
{
folder_scan_reset (s);
s->fd = fd;
return 0;
}
static gint
folder_scan_init_with_stream (struct _header_scan_state *s,
CamelStream *stream,
GError **error)
{
folder_scan_reset (s);
s->stream = g_object_ref (stream);
return 0;
}
#define USE_FROM
static void
folder_scan_step (struct _header_scan_state *s,
gchar **databuffer,
gsize *datalength)
{
struct _header_scan_stack *h, *hb;
const gchar *content;
const gchar *bound;
gint type, state, seenlast;
CamelContentType *ct = NULL;
struct _header_scan_filter *f;
gsize presize;
gulong boundary_len;
/* printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/
tail_recurse:
d ({
printf ("\nSCAN STACK:\n");
printf (" '%s' :\n", states[s->state]);
hb = s->parts;
while (hb) {
printf (" '%s' : %s ", states[hb->savestate], hb->boundary);
if (hb->content_type) {
printf ("(%s/%s)", hb->content_type->type, hb->content_type->subtype);
} else {
printf ("(default)");
}
printf ("\n");
hb = hb->parent;
}
printf ("\n");
});
switch (s->state) {
#ifdef USE_FROM
case CAMEL_MIME_PARSER_STATE_INITIAL:
if (s->scan_from) {
h = g_malloc0 (sizeof (*h));
h->boundary = g_strdup ("From ");
h->boundarylen = strlen (h->boundary);
h->boundarylenfinal = h->boundarylen;
h->from_line = g_byte_array_new ();
folder_push_part (s, h);
s->state = CAMEL_MIME_PARSER_STATE_PRE_FROM;
goto scan_pre_from;
} else {
s->start_of_from = -1;
goto scan_header;
}
case CAMEL_MIME_PARSER_STATE_PRE_FROM:
scan_pre_from:
h = s->parts;
do {
hb = folder_scan_content (s, &state, databuffer, datalength);
if (s->scan_pre_from && *datalength > 0) {
d (printf ("got pre-from content %d bytes\n", *datalength));
return;
}
} while (hb == h && *datalength > 0);
if (*datalength == 0 && hb == h) {
d (printf ("found 'From '\n"));
s->start_of_from = folder_tell (s);
folder_scan_skip_line (s, h->from_line);
h->savestate = CAMEL_MIME_PARSER_STATE_INITIAL;
s->state = CAMEL_MIME_PARSER_STATE_FROM;
} else {
folder_pull_part (s);
s->state = CAMEL_MIME_PARSER_STATE_EOF;
}
return;
#else
case CAMEL_MIME_PARSER_STATE_INITIAL:
case CAMEL_MIME_PARSER_STATE_PRE_FROM:
#endif /* USE_FROM */
scan_header:
case CAMEL_MIME_PARSER_STATE_FROM:
s->start_of_headers = folder_tell (s);
h = folder_scan_header (s, &state);
#ifdef USE_FROM
if (s->scan_from)
h->savestate = CAMEL_MIME_PARSER_STATE_FROM_END;
else
#endif
h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
/* FIXME: should this check for MIME-Version: 1.0 as well? */
type = CAMEL_MIME_PARSER_STATE_HEADER;
if ((content = camel_header_raw_find (&h->headers, "Content-Type", NULL))
&& (ct = camel_content_type_decode (content))) {
if (!g_ascii_strcasecmp (ct->type, "multipart")) {
if (!camel_content_type_is (ct, "multipart", "signed")
&& (bound = camel_content_type_param (ct, "boundary"))) {
d (printf ("multipart, boundary = %s\n", bound));
h->boundarylen = strlen (bound) + 2;
h->boundarylenfinal = h->boundarylen + 2;
boundary_len = h->boundarylen + 3;
h->boundary = g_malloc (boundary_len);
g_snprintf (h->boundary, boundary_len, "--%s--", bound);
type = CAMEL_MIME_PARSER_STATE_MULTIPART;
} else {
/*camel_content_type_unref(ct);
ct = camel_content_type_decode ("text/plain");*/
/* We can't quite do this, as it will mess up all the offsets ... */
/* camel_header_raw_replace(&h->headers, "Content-Type", "text/plain", offset); */
/*g_warning("Multipart with no boundary, treating as text/plain");*/
}
} else if (!g_ascii_strcasecmp (ct->type, "message")) {
if (!g_ascii_strcasecmp (ct->subtype, "rfc822")
|| !g_ascii_strcasecmp (ct->subtype, "news")
/*|| !g_ascii_strcasecmp(ct->subtype, "partial")*/) {
type = CAMEL_MIME_PARSER_STATE_MESSAGE;
}
}
} else {
/* make the default type for multipart/digest be message/rfc822 */
if ((s->parts
&& camel_content_type_is (s->parts->content_type, "multipart", "digest"))) {
ct = camel_content_type_decode ("message/rfc822");
type = CAMEL_MIME_PARSER_STATE_MESSAGE;
d (printf ("parent was multipart/digest, autoupgrading to message/rfc822?\n"));
/* maybe we should do this too?
* header_raw_append_parse(&h->headers, "Content-Type: message/rfc822", -1);*/
} else {
ct = camel_content_type_decode ("text/plain");
}
}
h->content_type = ct;
folder_push_part (s, h);
s->state = type;
return;
case CAMEL_MIME_PARSER_STATE_HEADER:
s->state = CAMEL_MIME_PARSER_STATE_BODY;
/* coverity[fallthrough] */
case CAMEL_MIME_PARSER_STATE_BODY:
h = s->parts;
*datalength = 0;
presize = SCAN_HEAD;
f = s->filters;
do {
hb = folder_scan_content (s, &state, databuffer, datalength);
d (printf ("\n\nOriginal content: '"));
d (fwrite (*databuffer, sizeof (gchar), *datalength, stdout));
d (printf ("'\n"));
if (*datalength > 0) {
while (f) {
camel_mime_filter_filter (
f->filter,
*databuffer, *datalength, presize,
databuffer, datalength, &presize);
d (fwrite (*databuffer, sizeof (gchar), *datalength, stdout));
d (printf ("'\n"));
f = f->next;
}
return;
}
} while (hb == h && *datalength > 0);
/* check for any filter completion data */
while (f) {
camel_mime_filter_complete (
f->filter, *databuffer, *datalength, presize,
databuffer, datalength, &presize);
f = f->next;
}
if (*datalength > 0)
return;
s->state = CAMEL_MIME_PARSER_STATE_BODY_END;
break;
case CAMEL_MIME_PARSER_STATE_MULTIPART:
h = s->parts;
/* This mess looks for the next boundary on this
* level. Once it finds the last one, it keeps going,
* looking for post-multipart content ('postface').
* Because messages might have duplicate boundaries for
* different parts, it makes sure it stops if its already
* found an end boundary for this part. It handles
* truncated and missing boundaries appropriately too. */
seenlast = FALSE;
do {
do {
hb = folder_scan_content (s, &state, databuffer, datalength);
if (*datalength > 0) {
/* instead of a new state, we'll just store it locally and provide
* an accessor function */
d (printf (
"Multipart %s Content %p: '%.*s'\n",
h->prestage > 0 ? "post" : "pre",
h, *datalength, *databuffer));
if (h->prestage > 0) {
if (h->posttext == NULL)
h->posttext = g_byte_array_new ();
g_byte_array_append (h->posttext, (guint8 *) *databuffer, *datalength);
} else {
if (h->pretext == NULL)
h->pretext = g_byte_array_new ();
g_byte_array_append (h->pretext, (guint8 *) *databuffer, *datalength);
}
}
} while (hb == h && *datalength > 0);
h->prestage++;
if (*datalength == 0 && hb == h && !seenlast) {
d (printf ("got boundary: %s last=%d\n", hb->boundary, state));
s->start_of_boundary = folder_tell (s);
folder_scan_skip_line (s, NULL);
if (!state) {
s->state = CAMEL_MIME_PARSER_STATE_FROM;
folder_scan_step (s, databuffer, datalength);
s->parts->savestate = CAMEL_MIME_PARSER_STATE_MULTIPART; /* set return state for the new head part */
return;
} else
seenlast = TRUE;
} else {
break;
}
} while (1);
s->state = CAMEL_MIME_PARSER_STATE_MULTIPART_END;
break;
case CAMEL_MIME_PARSER_STATE_MESSAGE:
s->state = CAMEL_MIME_PARSER_STATE_FROM;
folder_scan_step (s, databuffer, datalength);
s->parts->savestate = CAMEL_MIME_PARSER_STATE_MESSAGE_END;
break;
case CAMEL_MIME_PARSER_STATE_FROM_END:
case CAMEL_MIME_PARSER_STATE_BODY_END:
case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
s->state = s->parts->savestate;
folder_pull_part (s);
if (s->state & CAMEL_MIME_PARSER_STATE_END)
return;
goto tail_recurse;
case CAMEL_MIME_PARSER_STATE_EOF:
return;
default:
g_warning ("Invalid state in camel-mime-parser: %u", s->state);
break;
}
return;
}
/* drops the current state back one */
static void
folder_scan_drop_step (struct _header_scan_state *s)
{
switch (s->state) {
case CAMEL_MIME_PARSER_STATE_EOF:
s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
case CAMEL_MIME_PARSER_STATE_INITIAL:
return;
case CAMEL_MIME_PARSER_STATE_FROM:
case CAMEL_MIME_PARSER_STATE_PRE_FROM:
s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
folder_pull_part (s);
return;
case CAMEL_MIME_PARSER_STATE_MESSAGE:
case CAMEL_MIME_PARSER_STATE_HEADER:
case CAMEL_MIME_PARSER_STATE_MULTIPART:
case CAMEL_MIME_PARSER_STATE_FROM_END:
case CAMEL_MIME_PARSER_STATE_BODY_END:
case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
s->state = s->parts->savestate;
folder_pull_part (s);
if (s->state & CAMEL_MIME_PARSER_STATE_END) {
s->state &= ~CAMEL_MIME_PARSER_STATE_END;
}
return;
default:
/* FIXME: not sure if this is entirely right */
break;
}
}
#ifdef STANDALONE
gint main (gint argc, gchar **argv)
{
gint fd;
struct _header_scan_state *s;
gchar *data;
gsize len;
gint state;
gchar *name = "/tmp/evmail/Inbox";
struct _header_scan_stack *h;
gint i;
gint attach = 0;
if (argc == 2)
name = argv[1];
printf ("opening: %s", name);
for (i = 1; i < argc; i++) {
const gchar *encoding = NULL, *charset = NULL;
gchar *attachname;
name = argv[i];
printf ("opening: %s", name);
fd = g_open (name, O_RDONLY | O_BINARY, 0);
if (fd==-1) {
perror ("Cannot open mailbox");
exit (1);
}
s = folder_scan_init ();
folder_scan_init_with_fd (s, fd);
s->scan_from = FALSE;
#if 0
h = g_malloc0 (sizeof (*h));
h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
folder_push_part (s, h);
#endif
while (s->state != CAMEL_MIME_PARSER_STATE_EOF) {
folder_scan_step (s, &data, &len);
printf ("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]);
switch (s->state) {
case CAMEL_MIME_PARSER_STATE_HEADER:
if (s->parts->content_type
&& (charset = camel_content_type_param (s->parts->content_type, "charset"))) {
if (g_ascii_strcasecmp (charset, "us-ascii")) {
#if 0
folder_push_filter_charset (s, "UTF-8", charset);
#endif
} else {
charset = NULL;
}
} else {
charset = NULL;
}
encoding = camel_header_raw_find (&s->parts->headers, "Content-transfer-encoding", 0);
printf ("encoding = '%s'\n", encoding);
if (encoding && !g_ascii_strncasecmp (encoding, " base64", 7)) {
printf ("adding base64 filter\n");
attachname = g_strdup_printf ("attach.%d.%d", i, attach++);
#if 0
folder_push_filter_save (s, attachname);
#endif
g_free (attachname);
#if 0
folder_push_filter_mime (s, 0);
#endif
}
if (encoding && !g_ascii_strncasecmp (encoding, " quoted-printable", 17)) {
printf ("adding quoted-printable filter\n");
attachname = g_strdup_printf ("attach.%d.%d", i, attach++);
#if 0
folder_push_filter_save (s, attachname);
#endif
g_free (attachname);
#if 0
folder_push_filter_mime (s, 1);
#endif
}
break;
case CAMEL_MIME_PARSER_STATE_BODY:
printf ("got body %d '%.*s'\n", len, len, data);
break;
case CAMEL_MIME_PARSER_STATE_BODY_END:
printf ("end body %d '%.*s'\n", len, len, data);
if (encoding && !g_ascii_strncasecmp (encoding, " base64", 7)) {
printf ("removing filters\n");
#if 0
folder_filter_pull (s);
folder_filter_pull (s);
#endif
}
if (encoding && !g_ascii_strncasecmp (encoding, " quoted-printable", 17)) {
printf ("removing filters\n");
#if 0
folder_filter_pull (s);
folder_filter_pull (s);
#endif
}
if (charset) {
#if 0
folder_filter_pull (s);
#endif
charset = NULL;
}
encoding = NULL;
break;
default:
break;
}
}
folder_scan_close (s);
close (fd);
}
return 0;
}
#endif /* STANDALONE */