diff options
Diffstat (limited to 'lib/download.c')
-rw-r--r-- | lib/download.c | 484 |
1 files changed, 484 insertions, 0 deletions
diff --git a/lib/download.c b/lib/download.c new file mode 100644 index 000000000..fb0cb60ea --- /dev/null +++ b/lib/download.c @@ -0,0 +1,484 @@ +/***************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.0 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is Curl. + * + * The Initial Developer of the Original Code is Daniel Stenberg. + * + * Portions created by the Initial Developer are Copyright (C) 1998. + * All Rights Reserved. + * + * ------------------------------------------------------------ + * Main author: + * - Daniel Stenberg <Daniel.Stenberg@haxx.nu> + * + * http://curl.haxx.nu + * + * $Source$ + * $Revision$ + * $Date$ + * $Author$ + * $State$ + * $Locker$ + * + * ------------------------------------------------------------ + ****************************************************************************/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "setup.h" + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_SYS_SELECT_H +#include <sys/select.h> +#endif + +#include "urldata.h" +#include <curl/curl.h> + +#ifdef __BEOS__ +#include <net/socket.h> +#endif + +#ifdef WIN32 +#if !defined( __GNUC__) || defined(__MINGW32__) +#include <winsock.h> +#endif +#include <time.h> /* for the time_t typedef! */ + +#if defined(__GNUC__) && defined(TIME_WITH_SYS_TIME) +#include <sys/time.h> +#endif + +#endif + +#include "progress.h" +#include "speedcheck.h" +#include "sendf.h" + +#ifdef USE_ZLIB +#include <zlib.h> +#endif + +#define MAX(x,y) ((x)>(y)?(x):(y)) + +/* --- download a stream from a socket --- */ + +/* This newly edited version of Download() was brought to us by the friendly + Mark Butler <butlerm@xmission.com>. Re-indented with the indent command. */ + +UrgError +Download (struct UrlData *data, + int sockfd, /* socket to read from */ + int size, /* -1 if unknown at this point */ + bool getheader, /* TRUE if header parsing is wanted */ + long *bytecountp /* return number of bytes read */ +) +{ + char *buf = data->buffer; + size_t nread; + int bytecount = 0; + long contentlength=0; + struct timeval start = tvnow(); + struct timeval now = start; + bool header = TRUE; + int headerline = 0; /* counts header lines to better track the first one */ + + char *hbufp; /* points at *end* of header line */ + int hbuflen = 0; + char *str; /* within buf */ + char *str_start; /* within buf */ + char *end_ptr; /* within buf */ + char *p; /* within headerbuff */ + bool content_range = FALSE; /* set TRUE if Content-Range: was found */ + int offset = 0; /* possible resume offset read from the + Content-Range: header */ + int code = 0; /* error code from the 'HTTP/1.? XXX' line */ +#ifdef USE_ZLIB + gzFile gzfile=NULL; +#endif + + /* for the low speed checks: */ + UrgError urg; + time_t timeofdoc=0; + long bodywrites=0; + + char newurl[URL_MAX_LENGTH]; /* buffer for Location: URL */ + + hbufp = data->headerbuff; + + myalarm (0); /* switch off the alarm-style timeout */ + + now = tvnow(); + start = now; + + if (!getheader) { + header = FALSE; + ProgressInit (data, size); + } + { + fd_set readfd; + fd_set keepfd; + struct timeval interval; + bool keepon = TRUE; + + /* timeout every X second + - makes a better progressmeter (i.e even when no data is read, the + meter can be updated and reflect reality) + - allows removal of the alarm() crap + - variable timeout is easier + */ + + FD_ZERO (&readfd); /* clear it */ + FD_SET (sockfd, &readfd); + + keepfd = readfd; +#ifdef USE_ZLIB + gzfile = gzdopen(sockfd, "rb"); +#endif + while (keepon) { + readfd = keepfd; /* set this every lap in the loop */ + interval.tv_sec = 2; + interval.tv_usec = 0; + + switch (select (sockfd + 1, &readfd, NULL, NULL, &interval)) { + case -1: /* error, stop reading */ + keepon = FALSE; + continue; + case 0: /* timeout */ + break; + default: /* read! */ +#ifdef USE_SSLEAY + if (data->use_ssl) { + nread = SSL_read (data->ssl, buf, BUFSIZE - 1); + } + else { +#endif +#ifdef USE_ZLIB + nread = gzread(gzfile, buf, BUFSIZE -1 ); +#else + nread = sread (sockfd, buf, BUFSIZE - 1); +#endif +#ifdef USE_SSLEAY + } +#endif /* USE_SSLEAY */ + + /* NULL terminate, allowing string ops to be used */ + if (0 < (signed int) nread) + buf[nread] = 0; + + /* if we receive 0 or less here, the server closed the connection and + we bail out from this! */ + else if (0 >= (signed int) nread) { + keepon = FALSE; + break; + } + + str = buf; /* Default buffer to use when we write the + buffer, it may be changed in the flow below + before the actual storing is done. */ + + /* Since this is a two-state thing, we check if we are parsing + headers at the moment or not. */ + + if (header) { + /* we are in parse-the-header-mode */ + + /* header line within buffer loop */ + do { + int hbufp_index; + + str_start = str; /* str_start is start of line within buf */ + + end_ptr = strchr (str_start, '\n'); + + if (!end_ptr) { + /* no more complete header lines within buffer */ + /* copy what is remaining into headerbuff */ + int str_length = (int)strlen(str); + + if (hbuflen + (int)str_length >= data->headersize) { + char *newbuff; + long newsize=MAX((hbuflen+str_length)*3/2, + data->headersize*2); + hbufp_index = hbufp - data->headerbuff; + newbuff = (char *)realloc(data->headerbuff, newsize); + if(!newbuff) { + failf (data, "Failed to alloc memory for big header!"); + return URG_READ_ERROR; + } + data->headersize=newsize; + data->headerbuff = newbuff; + hbufp = data->headerbuff + hbufp_index; + } + strcpy (hbufp, str); + hbufp += strlen (str); + hbuflen += strlen (str); + break; /* read more and try again */ + } + + str = end_ptr + 1; /* move just past new line */ + + if (hbuflen + (str - str_start) >= data->headersize) { + char *newbuff; + long newsize=MAX((hbuflen+(str-str_start))*3/2, + data->headersize*2); + hbufp_index = hbufp - data->headerbuff; + newbuff = (char *)realloc(data->headerbuff, newsize); + if(!newbuff) { + failf (data, "Failed to alloc memory for big header!"); + return URG_READ_ERROR; + } + data->headersize= newsize; + data->headerbuff = newbuff; + hbufp = data->headerbuff + hbufp_index; + } + + /* copy to end of line */ + strncpy (hbufp, str_start, str - str_start); + hbufp += str - str_start; + hbuflen += str - str_start; + *hbufp = 0; + + p = data->headerbuff; + + /* we now have a full line that p points to */ + if (('\n' == *p) || ('\r' == *p)) { + /* Zero-length line means end of header! */ + if (-1 != size) /* if known */ + size += bytecount; /* we append the already read size */ + + + if ('\r' == *p) + p++; /* pass the \r byte */ + if ('\n' == *p) + p++; /* pass the \n byte */ + + ProgressInit (data, size); /* init progress meter */ + header = FALSE; /* no more header to parse! */ + + /* now, only output this if the header AND body are requested: */ + if ((data->conf & (CONF_HEADER | CONF_NOBODY)) == CONF_HEADER) { + if((p - data->headerbuff) != + data->fwrite (data->headerbuff, 1, + p - data->headerbuff, data->out)) { + failf (data, "Failed writing output"); + return URG_WRITE_ERROR; + } + } + if(data->writeheader) { + /* obviously, the header is requested to be written to + this file: */ + if((p - data->headerbuff) != + fwrite (data->headerbuff, 1, p - data->headerbuff, + data->writeheader)) { + failf (data, "Failed writing output"); + return URG_WRITE_ERROR; + } + } + break; /* exit header line loop */ + } + + if (!headerline++) { + /* This is the first header, it MUST be the error code line + or else we consiser this to be the body right away! */ + if (sscanf (p, " HTTP/1.%*c %3d", &code)) { + /* 404 -> URL not found! */ + if ( + ( ((data->conf & CONF_FOLLOWLOCATION) && (code >= 400)) || + !(data->conf & CONF_FOLLOWLOCATION) && (code >= 300)) + && (data->conf & CONF_FAILONERROR)) { + /* If we have been told to fail hard on HTTP-errors, + here is the check for that: */ + /* serious error, go home! */ + failf (data, "The requested file was not found"); + return URG_HTTP_NOT_FOUND; + } + } + else { + header = FALSE; /* this is not a header line */ + break; + } + } + /* check for Content-Length: header lines to get size */ + if (strnequal("Content-Length", p, 14) && + sscanf (p+14, ": %ld", &contentlength)) + size = contentlength; + else if (strnequal("Content-Range", p, 13) && + sscanf (p+13, ": bytes %d-", &offset)) { + if (data->resume_from == offset) { + /* we asked for a resume and we got it */ + content_range = TRUE; + } + } + else if(data->cookies && + strnequal("Set-Cookie: ", p, 11)) { + cookie_add(data->cookies, TRUE, &p[12]); + } + else if(strnequal("Last-Modified:", p, strlen("Last-Modified:")) && + data->timecondition) { + time_t secs=time(NULL); + timeofdoc = get_date(p+strlen("Last-Modified:"), &secs); + } + else if ((code >= 300 && code < 400) && + (data->conf & CONF_FOLLOWLOCATION) && + strnequal("Location", p, 8) && + sscanf (p+8, ": %" URL_MAX_LENGTH_TXT "s", newurl)) { + /* this is the URL that the server advices us to get + instead */ + data->newurl = strdup (newurl); + } + + if (data->conf & CONF_HEADER) { + if(hbuflen != data->fwrite (p, 1, hbuflen, data->out)) { + failf (data, "Failed writing output"); + return URG_WRITE_ERROR; + } + } + if(data->writeheader) { + /* the header is requested to be written to this file */ + if(hbuflen != fwrite (p, 1, hbuflen, data->writeheader)) { + failf (data, "Failed writing output"); + return URG_WRITE_ERROR; + } + } + + /* reset hbufp pointer && hbuflen */ + hbufp = data->headerbuff; + hbuflen = 0; + } + while (*str); /* header line within buffer */ + + /* We might have reached the end of the header part here, but + there might be a non-header part left in the end of the read + buffer. */ + + if (!header) { + /* the next token and forward is not part of + the header! */ + + /* we subtract the remaining header size from the buffer */ + nread -= (str - buf); + } + + } /* end if header mode */ + + /* This is not an 'else if' since it may be a rest from the header + parsing, where the beginning of the buffer is headers and the end + is non-headers. */ + if (str && !header && (nread > 0)) { + + if(0 == bodywrites) { + /* These checks are only made the first time we are about to + write a chunk of the body */ + if(data->conf&CONF_HTTP) { + /* HTTP-only checks */ + if (data->resume_from && !content_range ) { + /* we wanted to resume a download, although the server doesn't + seem to support this */ + failf (data, "HTTP server doesn't seem to support byte ranges. Cannot resume."); + return URG_HTTP_RANGE_ERROR; + } + else if (data->newurl) { + /* abort after the headers if "follow Location" is set */ + infof (data, "Follow to new URL: %s\n", data->newurl); + return URG_OK; + } + else if(data->timecondition && !data->range) { + /* A time condition has been set AND no ranges have been + requested. This seems to be what chapter 13.3.4 of RFC 2616 + defines to be the correct action for a HTTP/1.1 client */ + if((timeofdoc > 0) && (data->timevalue > 0)) { + switch(data->timecondition) { + case TIMECOND_IFMODSINCE: + default: + if(timeofdoc < data->timevalue) { + infof(data, "The requested document is not new enough"); + return URG_OK; + } + break; + case TIMECOND_IFUNMODSINCE: + if(timeofdoc > data->timevalue) { + infof(data, "The requested document is not old enough"); + return URG_OK; + } + break; + } /* switch */ + } /* two valid time strings */ + } /* we have a time condition */ + } /* this is HTTP */ + } /* this is the first time we write a body part */ + bodywrites++; + + if(data->maxdownload && + (bytecount + nread > data->maxdownload)) { + nread = data->maxdownload - bytecount; + if(nread < 0 ) /* this should be unusual */ + nread = 0; + keepon = FALSE; /* we're done now! */ + } + + bytecount += nread; + + if (nread != data->fwrite (str, 1, nread, data->out)) { + failf (data, "Failed writing output"); + return URG_WRITE_ERROR; + } + + } + break; + } + now = tvnow(); + if (!header) { + ProgressShow (data, bytecount, start, now, FALSE); + } + urg = speedcheck (data, now); + if (urg) + return urg; + + if (data->timeout && (tvdiff (now, start) > data->timeout)) { + failf (data, "Operation timed out with %d out of %d bytes received", + bytecount, size); + return URG_OPERATION_TIMEOUTED; + } +#ifdef MULTIDOC + if(contentlength && bytecount >= contentlength) { + /* we're done with this download, now stop it */ + break; + } +#endif + } + } + if(contentlength && (bytecount != contentlength)) { + failf(data, "transfer closed with %d bytes remaining", contentlength-bytecount); + return URG_PARTIAL_FILE; + } + ProgressShow (data, bytecount, start, now, TRUE); + + *bytecountp = bytecount; + +#ifdef USE_ZLIB + gzclose(gzfile); +#endif + return URG_OK; +} + + |