summaryrefslogtreecommitdiff
path: root/ACE/ACEXML/common/HttpCharStream.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'ACE/ACEXML/common/HttpCharStream.cpp')
-rw-r--r--ACE/ACEXML/common/HttpCharStream.cpp464
1 files changed, 464 insertions, 0 deletions
diff --git a/ACE/ACEXML/common/HttpCharStream.cpp b/ACE/ACEXML/common/HttpCharStream.cpp
new file mode 100644
index 00000000000..b6f5e6aa423
--- /dev/null
+++ b/ACE/ACEXML/common/HttpCharStream.cpp
@@ -0,0 +1,464 @@
+// $Id$
+
+#include "ace/ACE.h"
+#include "ace/ace_wchar.h"
+#include "ace/Auto_Ptr.h"
+#include "ace/OS_NS_stdio.h"
+#include "ace/OS_NS_string.h"
+#include "ACEXML/common/HttpCharStream.h"
+#include "ACEXML/common/Encoding.h"
+
+ACE_RCSID (common, HttpCharStream, "$Id$")
+
+/* Header FSM states. */
+static const int HDST_LINE1_PROTOCOL = 0;
+static const int HDST_LINE1_WHITESPACE = 1;
+static const int HDST_LINE1_STATUS = 2;
+static const int HDST_BOL = 10;
+static const int HDST_TEXT = 11;
+static const int HDST_LF = 12;
+static const int HDST_CR = 13;
+static const int HDST_CRLF = 14;
+static const int HDST_CRLFCR = 15;
+
+ACEXML_HttpCharStream::ACEXML_HttpCharStream (void)
+ : url_(0),
+ url_addr_(0),
+ stream_(0),
+ connector_(0),
+ size_(0),
+ data_offset_ (0),
+ encoding_ (0)
+{
+
+}
+
+ACEXML_HttpCharStream::~ACEXML_HttpCharStream (void)
+{
+ this->close ();
+}
+
+int
+ACEXML_HttpCharStream::open (const ACEXML_Char *url)
+{
+ this->url_ = ACE::strnew (url);
+
+ ACE_NEW_RETURN (this->url_addr_, ACEXML_URL_Addr, -1);
+ ACE_NEW_RETURN (this->stream_, ACEXML_Mem_Map_Stream, -1);
+
+ if (this->url_addr_->string_to_addr (this->url_) == -1) {
+ this->close();
+ ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot convert URL"), -1);
+ }
+
+ ACE_NEW_RETURN (this->connector_,
+ Connector (0, ACE_NONBLOCK),
+ -1);
+
+ if (this->stream_->open (this->connector_, *this->url_addr_) == -1) {
+ this->close();
+ ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot open backing store"), -1);
+ }
+
+ int result = this->send_request();
+ if (result == -1) {
+ this->close();
+ ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "send_request"), -1);
+ }
+
+ size_t len = 0;
+ result = this->get_url(len);
+ if (result == -1) {
+ this->close();
+ ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "get_url"), -1);
+ }
+ if (result != 200) {
+ this->close();
+ ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
+ result,
+ "Refer HTTP/1.0 error code for details"), -1);
+ }
+
+ this->size_ = static_cast<off_t> (len);
+ return this->determine_encoding();
+}
+
+// The FSM was taken from the implementation of http_get and that falls
+// under the following license:
+//
+// Copyrigh (c) 2000 by Jef Poskanzer <jef@acme.com>. All rights reserved.
+
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+
+int
+ACEXML_HttpCharStream::get_url (size_t& len)
+{
+ if (this->stream_ == 0)
+ return -1;
+
+ int header_state = HDST_LINE1_PROTOCOL;
+ int status = 0;
+ size_t b = 0;
+ char* buf = 0;
+ size_t buflen = BUFSIZ;
+ for (;;)
+ {
+ if ((buf = const_cast<char*> (this->stream_->recv (buflen))) == 0)
+ if (buflen <= 0)
+ break;
+
+ for (b = 0; b < buflen; ++b)
+ {
+ switch ( header_state )
+ {
+ case HDST_LINE1_PROTOCOL:
+ switch ( buf[b] )
+ {
+ case ' ': case '\t':
+ header_state = HDST_LINE1_WHITESPACE; break;
+ case '\n': header_state = HDST_LF ; break;
+ case '\r': header_state = HDST_CR; break;
+ }
+ break;
+ case HDST_LINE1_WHITESPACE:
+ switch ( buf[b] )
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ status = buf[b] - '0';
+ header_state = HDST_LINE1_STATUS;
+ break;
+ case '\n': header_state = HDST_LF ; break;
+ case '\r': header_state = HDST_CR; break;
+ default: header_state = HDST_TEXT; break;
+ }
+ break;
+ case HDST_LINE1_STATUS:
+ switch ( buf[b] )
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ status = status * 10 + buf[b] - '0';
+ break;
+ case '\n': header_state = HDST_LF ; break;
+ case '\r': header_state = HDST_CR; break;
+ default: header_state = HDST_TEXT; break;
+ }
+ break;
+ case HDST_BOL:
+ switch ( buf[b] )
+ {
+ case '\n': header_state = HDST_LF; break;
+ case '\r': header_state = HDST_CR; break;
+ default: header_state = HDST_TEXT; break;
+ }
+ break;
+ case HDST_TEXT:
+ switch ( buf[b] )
+ {
+ case '\n': header_state = HDST_LF; break;
+ case '\r': header_state = HDST_CR; break;
+ }
+ break;
+
+ case HDST_LF:
+ switch ( buf[b] )
+ {
+ case '\n': goto end_of_headers;
+ case '\r': header_state = HDST_CR; break;
+ default: header_state = HDST_TEXT; break;
+ }
+ break;
+
+ case HDST_CR:
+ switch ( buf[b] )
+ {
+ case '\n': header_state = HDST_CRLF; break;
+ case '\r': goto end_of_headers;
+ default: header_state = HDST_TEXT; break;
+ }
+ break;
+
+ case HDST_CRLF:
+ switch ( buf[b] )
+ {
+ case '\n': goto end_of_headers;
+ case '\r': header_state = HDST_CRLFCR; break;
+ default: header_state = HDST_TEXT; break;
+ }
+ break;
+
+ case HDST_CRLFCR:
+ switch ( buf[b] )
+ {
+ case '\n': case '\r': goto end_of_headers;
+ default: header_state = HDST_TEXT; break;
+ }
+ break;
+ }
+ }
+ }
+ end_of_headers:
+ if (b == 0)
+ return -1;
+ ++b;
+ // Store the address of the beginning of data. We will use it to seek to
+ // beginning of the data in the URL.
+ char* data_beg = buf + b;
+ buflen = BUFSIZ;
+
+ // Get all of the data. Since this is backed by file store, we won't lose
+ // any of the data.
+ while (( buf = const_cast<char*> (this->stream_->recv (buflen))) != 0)
+ ;
+
+ // Length of data in the URL.
+ len = this->stream_->recv() - data_beg;
+
+ // Move the pointer to the beginning of the file store.
+ this->stream_->rewind();
+
+ this->data_offset_ = data_beg - this->stream_->recv();
+ // Forward to the beginning of data.
+ if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
+ ACE_ERROR_RETURN ((LM_ERROR, "%s: %m",
+ "Error in seeking to beginning of data"), -1);
+
+ return status;
+}
+
+
+int
+ACEXML_HttpCharStream::send_request (void)
+{
+ char* path = ACE::strnew (ACE_TEXT_ALWAYS_CHAR (this->url_addr_->get_path_name()));
+ ACE_Auto_Basic_Array_Ptr<char> path_ptr (path);
+ size_t commandsize = ACE_OS::strlen (path)
+ + ACE_OS::strlen (this->url_addr_->get_host_name ())
+ + 20 // Extra
+ + 1 // NUL byte
+ + 16 ; // Protocol filler...
+
+ char* command;
+ ACE_NEW_RETURN (command, char[commandsize], -1);
+
+ // Ensure that the <command> memory is deallocated.
+ ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
+
+ int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
+ bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
+ this->url_addr_->get_host_name ());
+ bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
+
+ ACE_Time_Value tv (ACE_DEFAULT_TIMEOUT);
+
+ // Send the command to the connected server.
+ int retval = this->stream_->send_n (command, bytes, &tv);
+ if (retval <= 0)
+ return -1;
+ return retval;
+}
+
+
+int
+ACEXML_HttpCharStream::available (void)
+{
+ if (this->stream_ == 0)
+ return -1;
+ return static_cast<int> (this->stream_->available());
+}
+
+int
+ACEXML_HttpCharStream::close (void)
+{
+ delete[] this->url_;
+ this->url_ = 0;
+
+ delete this->url_addr_;
+ this->url_addr_ = 0;
+
+ delete this->stream_;
+ this->stream_ = 0;
+
+ delete this->connector_;
+ this->connector_ = 0;
+
+ this->size_ = 0;
+ this->data_offset_ = 0;
+
+ delete[] this->encoding_;
+ this->encoding_ = 0;
+
+ return 0;
+}
+
+int
+ACEXML_HttpCharStream::determine_encoding (void)
+{
+ if (this->stream_ == 0)
+ return -1;
+
+ char input[4] = {0, 0, 0, 0};
+ int i = 0;
+ for (; i < 4 && input[i] != (char)-1; ++i)
+ input[i] = static_cast<char> (this->stream_->peek_char(i));
+ if (i < 4)
+ return -1;
+ const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
+ if (!temp)
+ return -1;
+ else
+ {
+ if (this->encoding_)
+ delete [] this->encoding_;
+ this->encoding_ = ACE::strnew (temp);
+ // ACE_DEBUG ((LM_DEBUG, "URI's encoding is %s\n", this->encoding_));
+ }
+ // Move over the byte-order-mark if present.
+ for (int j = 0; j < 3; ++j)
+ {
+ if (input[i] == '\xFF' || input[i] == '\xFE' || input[i] == '\xEF' ||
+ input[i] == '\xBB' || input[i] == '\xBF')
+ {
+ this->stream_->get_char();
+ continue;
+ }
+ break;
+ }
+ return 0;
+}
+
+void
+ACEXML_HttpCharStream::rewind (void)
+{
+ if (this->stream_ == 0)
+ return;
+ this->stream_->rewind();
+
+ // Forward to the beginning of data.
+ if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
+ ACE_ERROR ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"));
+ this->determine_encoding();
+}
+
+const ACEXML_Char*
+ACEXML_HttpCharStream::getEncoding (void)
+{
+ return this->encoding_;
+}
+
+const ACEXML_Char*
+ACEXML_HttpCharStream::getSystemId (void)
+{
+ return this->url_;
+}
+
+
+int
+ACEXML_HttpCharStream::read (ACEXML_Char *str,
+ size_t len)
+{
+ if (this->stream_ == 0)
+ return -1;
+ len = len * sizeof (ACEXML_Char);
+ char* temp = const_cast<char*> (this->stream_->recv (len));
+ str = ACE_TEXT_CHAR_TO_TCHAR (temp);
+ if (str == 0)
+ return -1;
+ return static_cast<int> (len);
+}
+
+
+int
+ACEXML_HttpCharStream::get (ACEXML_Char& ch)
+{
+ if (this->stream_ == 0)
+ return -1;
+#if defined (ACE_USES_WCHAR)
+ return this->get_i (ch);
+#else
+ ch = (ACEXML_Char) this->stream_->get_char();
+ return (ch == (ACEXML_Char)EOF ? -1 :0);
+#endif /* ACE_USES_WCHAR */
+}
+
+int
+ACEXML_HttpCharStream::peek (void)
+{
+ if (this->stream_ == 0)
+ return -1;
+
+#if defined (ACE_USES_WCHAR)
+ return this->peek_i();
+#else
+ return this->stream_->peek_char (0);
+#endif /* ACE_USES_WCHAR */
+}
+
+
+#if defined (ACE_USES_WCHAR)
+int
+ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
+{
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ch = (ACEXML_Char) this->stream_->get_char();
+ return (ch == (ACEXML_Char)EOF ? -1 : 0);
+ }
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ ACEXML_Char input[2] = {0};
+ int i = 0;
+ for (; i < 2 && (input[i] = this->stream_->get_char()) > 0; ++i)
+ ;
+ if (i < 2)
+ {
+ ch = 0;
+ return input[i];
+ }
+ ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
+ return 0;
+}
+
+int
+ACEXML_HttpCharStream::peek_i (void)
+{
+ // If we are reading a UTF-8 encoded file, just use the plain unget.
+ if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
+ {
+ ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
+ return ch;
+ }
+
+ int BE = (ACE_OS::strcmp (this->encoding_,
+ ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
+ // Peek into the stream.
+ ACEXML_Char input[2];
+ int i = 0;
+ for (; i < 2 && (input[i] = this->stream_->peek_char (i)) > 0; ++i)
+ ;
+ if (i < 2)
+ return -1;
+ return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
+}
+#endif /* ACE_USES_WCHAR */