summaryrefslogtreecommitdiff
path: root/src/assistant/3rdparty/clucene/src/CLucene/document
diff options
context:
space:
mode:
Diffstat (limited to 'src/assistant/3rdparty/clucene/src/CLucene/document')
-rw-r--r--src/assistant/3rdparty/clucene/src/CLucene/document/DateField.cpp60
-rw-r--r--src/assistant/3rdparty/clucene/src/CLucene/document/DateField.h64
-rw-r--r--src/assistant/3rdparty/clucene/src/CLucene/document/Document.cpp237
-rw-r--r--src/assistant/3rdparty/clucene/src/CLucene/document/Document.h158
-rw-r--r--src/assistant/3rdparty/clucene/src/CLucene/document/Field.cpp315
-rw-r--r--src/assistant/3rdparty/clucene/src/CLucene/document/Field.h261
6 files changed, 1095 insertions, 0 deletions
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/document/DateField.cpp b/src/assistant/3rdparty/clucene/src/CLucene/document/DateField.cpp
new file mode 100644
index 000000000..ff72b12bb
--- /dev/null
+++ b/src/assistant/3rdparty/clucene/src/CLucene/document/DateField.cpp
@@ -0,0 +1,60 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+
+#include "DateField.h"
+#include "CLucene/util/Misc.h"
+CL_NS_USE(util)
+CL_NS_DEF(document)
+
+DateField::~DateField(){
+}
+
+TCHAR* DateField::timeToString(const int64_t time) {
+ TCHAR* buf = _CL_NEWARRAY(TCHAR,DATEFIELD_DATE_LEN + 1);
+ timeToString(time,buf);
+ return buf;
+}
+void DateField::timeToString(const int64_t time, TCHAR* buf) {
+ CND_PRECONDITION (buf, "buf == NULL");
+ *buf = '\0';
+ if (time < 0)
+ _CLTHROWA (CL_ERR_IllegalArgument,"time too early"); //todo: make richer error
+
+ if (time > DATEFIELD_DATE_MAX)
+ _CLTHROWA (CL_ERR_IllegalArgument, "time too late (past DATEFIELD_DATE_MAX"); //todo: make richer error
+
+ _i64tot(time, buf, 36);
+ int32_t bufLen = _tcslen(buf);
+
+ CND_PRECONDITION (bufLen <= DATEFIELD_DATE_LEN, "timeToString length is greater than 9");
+
+ /* Supply leading zeroes if necessary. */
+ if (bufLen < DATEFIELD_DATE_LEN) {
+ const int32_t nMissingZeroes = DATEFIELD_DATE_LEN - bufLen;
+ /* Move buffer contents forward to make room for leading zeroes. */
+ for (int32_t i = DATEFIELD_DATE_LEN - 1; i >= nMissingZeroes; i--)
+ buf[i] = buf[i - nMissingZeroes];
+
+ /* Insert leading zeroes. */
+ {// MSVC6 scoping fix
+ for (int32_t i = 0; i < nMissingZeroes; i++)
+ buf[i] = '0';
+ }
+
+ buf[DATEFIELD_DATE_LEN] = 0;
+ }
+
+ CND_PRECONDITION (_tcslen(buf) == DATEFIELD_DATE_LEN, "timeToString return is not equal to DATEFIELD_DATE_LEN");
+}
+
+int64_t DateField::stringToTime(const TCHAR* time) {
+ TCHAR* end;
+ return _tcstoi64(time, &end, 36);
+}
+
+CL_NS_END
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/document/DateField.h b/src/assistant/3rdparty/clucene/src/CLucene/document/DateField.h
new file mode 100644
index 000000000..712fe9b62
--- /dev/null
+++ b/src/assistant/3rdparty/clucene/src/CLucene/document/DateField.h
@@ -0,0 +1,64 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_document_DateField_
+#define _lucene_document_DateField_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+CL_NS_DEF(document)
+
+//here are some constants used throughout clucene
+//make date strings long enough to last a millenium
+#define DATEFIELD_DATE_MAX _ILONGLONG(31536000000000) //1000L*365*24*60*60*1000
+
+#define DATEFIELD_DATE_LEN 9 ////Long.toString(DATEFIELD_DATE_MAX, Character.MAX_RADIX).length()
+
+/**
+* Provides support for converting dates to strings and vice-versa.
+* The strings are structured so that lexicographic sorting orders by date,
+* which makes them suitable for use as field values and search terms.
+*
+* <P>Note that this class saves dates with millisecond granularity,
+* which is bad for {@link RangeQuery} and {@link PrefixQuery}, as those
+* queries are expanded to a BooleanQuery with a potentially large number
+* of terms when searching. Thus you might want to use
+* {@link DateTools} instead.
+*
+* <P>
+* Note: dates before 1970 cannot be used, and therefore cannot be
+* indexed when using this class. See {@link DateTools} for an
+* alternative without such a limitation.
+*
+* @deprecated If you build a new index, use {@link DateTools} instead. This class is included for use with existing
+* indices and will be removed in a future release.
+*/
+class DateField :LUCENE_BASE {
+public:
+ ~DateField();
+
+ /**
+ * Converts a millisecond time to a string suitable for indexing.
+ * @throws RuntimeException if the time specified in the
+ * method argument is negative, that is, before 1970
+ */
+ static TCHAR* timeToString(const int64_t time);
+
+ /**
+ * Converts a millisecond time to a string suitable for indexing.
+ * @throws CL_ERR_IllegalArgument if the time specified in the
+ * method argument is negative, that is, before 1970
+ * @param str must be a character array DATEFIELD_DATE_LEN+1 or longer
+ */
+ static void timeToString(const int64_t time, TCHAR* str);
+
+ /** Converts a string-encoded date into a millisecond time. */
+ static int64_t stringToTime(const TCHAR* s);
+};
+CL_NS_END
+#endif
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/document/Document.cpp b/src/assistant/3rdparty/clucene/src/CLucene/document/Document.cpp
new file mode 100644
index 000000000..a0ce03942
--- /dev/null
+++ b/src/assistant/3rdparty/clucene/src/CLucene/document/Document.cpp
@@ -0,0 +1,237 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "Document.h"
+#include "Field.h"
+#include "CLucene/util/StringBuffer.h"
+
+CL_NS_USE(util)
+CL_NS_DEF(document)
+
+ DocumentFieldEnumeration::DocumentFieldList::DocumentFieldList(Field* f, DocumentFieldList* n ) {
+ //Func - Constructor
+ //Pre - f != NULL
+ // n may be NULL
+ //Post - Instance has been created
+ CND_PRECONDITION(f != NULL, "f is NULL");
+
+ field = f;
+ next = n;
+ }
+ DocumentFieldEnumeration::DocumentFieldList::~DocumentFieldList(){
+ //Func - Destructor
+ //Pre - true
+ //Post - Instance has been destroyed
+
+ // Instead of recursively deleting the field list we do
+ // it iteratively to avoid stack overflows when
+ // dealing with several thousands of fields.
+
+ if (!field) {
+ return; // nothing to do; deleted by different invocation of dtor
+ }
+
+ DocumentFieldList* cur = next;
+ while (cur != NULL)
+ {
+ DocumentFieldList* temp = cur->next;
+ cur->next = NULL;
+
+ _CLDELETE(cur);
+ cur = temp;
+ }
+ _CLDELETE(field);
+ }
+
+
+ DocumentFieldEnumeration::DocumentFieldEnumeration(const DocumentFieldList* fl){
+ //Func - Constructor
+ //Pre - fl may be NULL
+ //Post - Instance has been created
+
+ fields = fl;
+ }
+
+ DocumentFieldEnumeration::~DocumentFieldEnumeration(){
+ //Func - Destructor
+ //Pre - true
+ //Post - Instance has been destroyed
+ }
+
+ bool DocumentFieldEnumeration::hasMoreElements() const {
+ return fields == NULL ? false : true;
+ }
+
+ Field* DocumentFieldEnumeration::nextElement() {
+ //Func - Return the next element in the enumeration
+ //Pre - true
+ //Post - The next element is returned or NULL
+
+
+ Field* result = NULL;
+ //Check if fields is still valid
+ if (fields){
+ result = fields->field;
+ fields = fields->next;
+ }
+ return result;
+ }
+
+ /** Constructs a new document with no fields. */
+ Document::Document(){
+ //Func - Constructor
+ //Pre - true
+ //Post - Instance has been created
+ boost = 1.0f;
+ fieldList = NULL;
+ }
+
+ Document::~Document(){
+ //Func - Destructor
+ //Pre - true
+ //Post - Instance has been destroyed
+ boost = 1.0f;
+ _CLDELETE(fieldList);
+ }
+
+ void Document::clear(){
+ _CLDELETE(fieldList);
+ }
+
+ void Document::add(Field& field) {
+ fieldList = _CLNEW DocumentFieldEnumeration::DocumentFieldList(&field, fieldList);
+ }
+
+ void Document::setBoost(qreal boost) {
+ this->boost = boost;
+ }
+
+ qreal Document::getBoost() const {
+ return boost;
+ }
+
+
+ Field* Document::getField(const TCHAR* name) const{
+ CND_PRECONDITION(name != NULL, "name is NULL");
+
+ for (DocumentFieldEnumeration::DocumentFieldList* list = fieldList; list != NULL; list = list->next)
+ //cannot use interning here, because name is probably not interned
+ if ( _tcscmp(list->field->name(), name) == 0 ){
+ return list->field;
+ }
+
+ return NULL;
+ }
+
+ const TCHAR* Document::get(const TCHAR* field) const {
+ CND_PRECONDITION(field != NULL, "field is NULL");
+ Field *f = getField(field);
+ if (f!=NULL)
+ return f->stringValue(); //this returns null it is a binary(reader)
+ else
+ return NULL;
+ }
+
+ DocumentFieldEnumeration* Document::fields() const {
+ return _CLNEW DocumentFieldEnumeration(fieldList);
+ }
+
+
+ TCHAR* Document::toString() const {
+ StringBuffer ret(_T("Document<"));
+ for (DocumentFieldEnumeration::DocumentFieldList* list = fieldList; list != NULL; list = list->next) {
+ TCHAR* tmp = list->field->toString();
+ ret.append( tmp );
+ if (list->next != NULL)
+ ret.append(_T(" "));
+ _CLDELETE_ARRAY( tmp );
+ }
+ ret.append(_T(">"));
+ return ret.toString();
+ }
+
+
+
+ void Document::removeField(const TCHAR* name) {
+ CND_PRECONDITION(name != NULL, "name is NULL");
+
+ DocumentFieldEnumeration::DocumentFieldList* previous = NULL;
+ DocumentFieldEnumeration::DocumentFieldList* current = fieldList;
+ while (current != NULL) {
+ //cannot use interning here, because name is probably not interned
+ if ( _tcscmp(current->field->name(),name) == 0 ){
+ if (previous){
+ previous->next = current->next;
+ }else
+ fieldList = current->next;
+ current->next=NULL; //ensure fieldlist destructor doesnt delete it
+ _CLDELETE(current);
+ return;
+ }
+ previous = current;
+ current = current->next;
+ }
+ }
+
+ void Document::removeFields(const TCHAR* name) {
+ CND_PRECONDITION(name != NULL, "name is NULL");
+
+ DocumentFieldEnumeration::DocumentFieldList* previous = NULL;
+ DocumentFieldEnumeration::DocumentFieldList* current = fieldList;
+ while (current != NULL) {
+ //cannot use interning here, because name is probably not interned
+ if ( _tcscmp(current->field->name(),name) == 0 ){
+ if (previous){
+ previous->next = current->next;
+ }else
+ fieldList = current->next;
+
+ current->next=NULL; //ensure fieldlist destructor doesnt delete it
+ _CLDELETE(current);
+
+ if ( previous )
+ current = previous->next;
+ else
+ current = fieldList;
+ }else{
+ previous = current;
+ current = current->next;
+ }
+ }
+ }
+
+ TCHAR** Document::getValues(const TCHAR* name) {
+ DocumentFieldEnumeration* it = fields();
+ int32_t count = 0;
+ while ( it->hasMoreElements() ){
+ Field* f = it->nextElement();
+ //cannot use interning here, because name is probably not interned
+ if ( _tcscmp(f->name(),name) == 0 && f->stringValue() != NULL )
+ count++;
+ }
+ _CLDELETE(it);
+ it = fields();
+
+ //todo: there must be a better way of doing this, we are doing two iterations of the fields
+ TCHAR** ret = NULL;
+ if ( count > 0 ){
+ //start again
+ ret = _CL_NEWARRAY(TCHAR*,count+1);
+ int32_t i=0;
+ while ( it->hasMoreElements() ){
+ Field* fld=it->nextElement();
+ if ( _tcscmp(fld->name(),name)== 0 && fld->stringValue() != NULL ){
+ ret[i] = stringDuplicate(fld->stringValue());
+ i++;
+ }
+ }
+ ret[count]=NULL;
+ }
+ _CLDELETE(it);
+ return ret;
+ }
+CL_NS_END
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/document/Document.h b/src/assistant/3rdparty/clucene/src/CLucene/document/Document.h
new file mode 100644
index 000000000..ba7a283f7
--- /dev/null
+++ b/src/assistant/3rdparty/clucene/src/CLucene/document/Document.h
@@ -0,0 +1,158 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_document_Document_
+#define _lucene_document_Document_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include "Field.h"
+
+///todo: jlucene has change from using DocumentFieldList/Enumeration
+///to using a java List... do we want to do this too?
+CL_NS_DEF(document)
+
+class Document; //predefine
+class DocumentFieldEnumeration :LUCENE_BASE{
+ class DocumentFieldList :LUCENE_BASE{
+ public:
+ DocumentFieldList(Field* f, DocumentFieldList* n);
+ ~DocumentFieldList();
+ Field* field;
+ DocumentFieldList* next;
+ };
+ friend class Document;
+private:
+ const DocumentFieldList* fields;
+public:
+ DocumentFieldEnumeration(const DocumentFieldList* fl);
+ ~DocumentFieldEnumeration();
+ bool hasMoreElements() const;
+ Field* nextElement();
+};
+
+/** Documents are the unit of indexing and search.
+*
+* A Document is a set of fields. Each field has a name and a textual value.
+* A field may be {@link Field#isStored() stored} with the document, in which
+* case it is returned with search hits on the document. Thus each document
+* should typically contain one or more stored fields which uniquely identify
+* it.
+*
+* <p>Note that fields which are <i>not</i> {@link Field#isStored() stored} are
+* <i>not</i> available in documents retrieved from the index, e.g. with {@link
+* Hits#doc(int32_t, Document*)}, {@link Searcher#doc(int32_t, Document*)} or {@link
+* IndexReader#document(int32_t, Document*)}.
+*/
+class Document:LUCENE_BASE {
+private:
+ DocumentFieldEnumeration::DocumentFieldList* fieldList;
+ qreal boost;
+public:
+ Document();
+ ~Document();
+
+ /**
+ * <p>Adds a field to a document. Several fields may be added with
+ * the same name. In this case, if the fields are indexed, their text is
+ * treated as though appended for the purposes of search.</p>
+ * <p> Note that add like the removeField(s) methods only makes sense
+ * prior to adding a document to an index. These methods cannot
+ * be used to change the content of an existing index! In order to achieve this,
+ * a document has to be deleted from an index and a new changed version of that
+ * document has to be added.</p>
+ *
+ */
+ void add(Field& field);
+ /** Returns a field with the given name if any exist in this document, or
+ * null. If multiple fields exists with this name, this method returns the
+ * first value added.
+ * Note: name is case sensitive
+ */
+ Field* getField(const TCHAR* name) const;
+
+ /** Returns the string value of the field with the given name if any exist in
+ * this document, or null. If multiple fields exist with this name, this
+ * method returns the first value added. If only binary fields with this name
+ * exist, returns null.
+ * Note: name is case sensitive
+ */
+ const TCHAR* get(const TCHAR* field) const;
+
+ /** Returns an Enumeration of all the fields in a document. */
+ DocumentFieldEnumeration* fields() const;
+ /** Prints the fields of a document for human consumption. */
+ TCHAR* toString() const;
+
+ /** Sets a boost factor for hits on any field of this document. This value
+ * will be multiplied into the score of all hits on this document.
+ *
+ * <p>Values are multiplied into the value of {@link Field#getBoost()} of
+ * each field in this document. Thus, this method in effect sets a default
+ * boost for the fields of this document.
+ *
+ * @see Field#setBoost(qreal)
+ */
+ void setBoost(qreal boost);
+
+ /** Returns the boost factor for hits on any field of this document.
+ *
+ * <p>The default value is 1.0.
+ *
+ * <p>Note: This value is not stored directly with the document in the index.
+ * Documents returned from {@link IndexReader#document(int32_t, Document*)} and
+ * {@link Hits#doc(int32_t, Document*)} may thus not have the same value present as when
+ * this document was indexed.
+ *
+ * @see #setBoost(qreal)
+ */
+ qreal getBoost() const;
+
+
+ /**
+ * <p>Removes field with the specified name from the document.
+ * If multiple fields exist with this name, this method removes the first field that has been added.
+ * If there is no field with the specified name, the document remains unchanged.</p>
+ * <p> Note that the removeField(s) methods like the add method only make sense
+ * prior to adding a document to an index. These methods cannot
+ * be used to change the content of an existing index! In order to achieve this,
+ * a document has to be deleted from an index and a new changed version of that
+ * document has to be added.</p>
+ * Note: name is case sensitive
+ */
+ void removeField(const TCHAR* name);
+
+ /**
+ * <p>Removes all fields with the given name from the document.
+ * If there is no field with the specified name, the document remains unchanged.</p>
+ * <p> Note that the removeField(s) methods like the add method only make sense
+ * prior to adding a document to an index. These methods cannot
+ * be used to change the content of an existing index! In order to achieve this,
+ * a document has to be deleted from an index and a new changed version of that
+ * document has to be added.</p>
+ * Note: name is case sensitive
+ */
+ void removeFields(const TCHAR* name);
+
+ /**
+ * Returns an array of values of the field specified as the method parameter.
+ * This method can return <code>null</code>.
+ * Note: name is case sensitive
+ *
+ * @param name the name of the field
+ * @return a <code>String[]</code> of field values
+ */
+ TCHAR** getValues(const TCHAR* name);
+
+ /**
+ * Empties out the document so that it can be reused
+ */
+ void clear();
+};
+CL_NS_END
+#endif
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/document/Field.cpp b/src/assistant/3rdparty/clucene/src/CLucene/document/Field.cpp
new file mode 100644
index 000000000..8cd88a36b
--- /dev/null
+++ b/src/assistant/3rdparty/clucene/src/CLucene/document/Field.cpp
@@ -0,0 +1,315 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include "CLucene/StdHeader.h"
+#include "CLucene/util/Reader.h"
+#include "Field.h"
+#include "CLucene/util/Misc.h"
+#include "CLucene/util/StringIntern.h"
+#include "CLucene/util/StringBuffer.h"
+
+CL_NS_USE(util)
+CL_NS_DEF(document)
+
+Field::Field(const TCHAR* Name, const TCHAR* String, bool store, bool index, bool token, const bool storeTermVector)
+{
+//Func - Constructor
+//Pre - Name != NULL and contains the name of the field
+// String != NULL and contains the value of the field
+// store indicates if the field must be stored
+// index indicates if the field must be indexed
+// token indicates if the field must be tokenized
+//Post - The instance has been created
+
+ CND_PRECONDITION(Name != NULL, "Name is NULL");
+ CND_PRECONDITION(String != NULL,"String is NULL");
+ CND_PRECONDITION(!(!index && storeTermVector),"cannot store a term vector for fields that are not indexed.");
+
+ _name = CLStringIntern::intern( Name CL_FILELINE);
+ _stringValue = stringDuplicate( String );
+ _readerValue = NULL;
+ _streamValue = NULL;
+ boost=1.0f;
+ omitNorms = false;
+
+ int cfg = 0;
+ if ( store )
+ cfg |= STORE_YES;
+ if ( index && token )
+ cfg |= INDEX_TOKENIZED;
+ else if ( index && !token )
+ cfg |= INDEX_UNTOKENIZED;
+
+ if ( storeTermVector )
+ _CLTHROWA(CL_ERR_IllegalArgument,"Stored term vector is deprecated with using this constructor");
+
+ setConfig(cfg);
+}
+
+Field::Field(const TCHAR* Name, Reader* reader, bool store, bool index, bool token, const bool storeTermVector)
+{
+//Func - Constructor
+//Pre - Name != NULL and contains the name of the field
+// reader != NULL and contains a Reader
+// store indicates if the field must be stored
+// index indicates if the field must be indexed
+// token indicates if the field must be tokenized
+//Post - The instance has been created
+
+ CND_PRECONDITION(Name != NULL, "Name is NULL");
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+
+ _name = CLStringIntern::intern( Name CL_FILELINE);
+ _stringValue = NULL;
+ _readerValue = reader;
+ _streamValue = NULL;
+ boost=1.0f;
+ omitNorms = false;
+
+ int cfg = 0;
+ if ( store )
+ cfg |= STORE_YES;
+ if ( index && token )
+ cfg |= INDEX_TOKENIZED;
+ else if ( index && !token )
+ cfg |= INDEX_UNTOKENIZED;
+
+ if ( storeTermVector )
+ _CLTHROWA(CL_ERR_IllegalArgument,"Stored term vector is deprecated with using this constructor");
+
+ setConfig(cfg);
+}
+
+Field::Field(const TCHAR* Name, Reader* reader, int config)
+{
+ CND_PRECONDITION(Name != NULL, "Name is NULL");
+ CND_PRECONDITION(reader != NULL, "reader is NULL");
+
+ _name = CLStringIntern::intern( Name CL_FILELINE);
+ _stringValue = NULL;
+ _readerValue = reader;
+ _streamValue = NULL;
+ boost=1.0f;
+ omitNorms = false;
+
+ setConfig(config);
+}
+
+
+Field::Field(const TCHAR* Name, const TCHAR* Value, int config)
+{
+ CND_PRECONDITION(Name != NULL, "Name is NULL");
+ CND_PRECONDITION(Value != NULL, "value is NULL");
+
+ _name = CLStringIntern::intern( Name CL_FILELINE);
+ _stringValue = stringDuplicate( Value );
+ _readerValue = NULL;
+ _streamValue = NULL;
+ boost=1.0f;
+ omitNorms = false;
+
+ setConfig(config);
+}
+
+Field::Field(const TCHAR* Name, jstreams::StreamBase<char>* Value, int config)
+{
+ CND_PRECONDITION(Name != NULL, "Name is NULL");
+ CND_PRECONDITION(Value != NULL, "value is NULL");
+
+ _name = CLStringIntern::intern( Name CL_FILELINE);
+ _stringValue = NULL;
+ _readerValue = NULL;
+ _streamValue = Value;
+ boost=1.0f;
+ omitNorms = false;
+
+ setConfig(config);
+}
+
+Field::~Field(){
+//Func - Destructor
+//Pre - true
+//Post - Instance has been destroyed
+
+ CLStringIntern::unintern(_name);
+ _CLDELETE_CARRAY(_stringValue);
+ _CLDELETE(_readerValue);
+ _CLVDELETE( _streamValue );
+}
+
+
+/*===============FIELDS=======================*/
+const TCHAR* Field::name() { return _name; } ///<returns reference
+TCHAR* Field::stringValue() { return _stringValue; } ///<returns reference
+Reader* Field::readerValue() { return _readerValue; } ///<returns reference
+jstreams::StreamBase<char>* Field::streamValue() { return _streamValue; } ///<returns reference
+
+bool Field::isStored() { return (config & STORE_YES) != 0; }
+bool Field::isIndexed() { return (config & INDEX_TOKENIZED)!=0 || (config & INDEX_UNTOKENIZED)!=0; }
+bool Field::isTokenized() { return (config & INDEX_TOKENIZED) != 0; }
+bool Field::isCompressed() { return (config & STORE_COMPRESS) != 0; }
+bool Field::isBinary() { return _streamValue!=NULL; }
+
+bool Field::isTermVectorStored() { return (config & TERMVECTOR_YES) != 0; }
+bool Field::isStoreOffsetWithTermVector() { return (config & TERMVECTOR_YES) != 0 && (config & TERMVECTOR_WITH_OFFSETS) != 0; }
+bool Field::isStorePositionWithTermVector() { return (config & TERMVECTOR_YES) != 0 && (config & TERMVECTOR_WITH_POSITIONS) != 0; }
+
+bool Field::getOmitNorms() { return omitNorms; }
+void Field::setOmitNorms(bool omitNorms) { this->omitNorms=omitNorms; }
+
+void Field::setBoost(qreal boost) { this->boost = boost; }
+qreal Field::getBoost() { return boost; }
+
+void Field::setConfig(int x){
+ int newConfig=0;
+
+ //set storage settings
+ if ( (x & STORE_YES) || (x & STORE_COMPRESS) ){
+ newConfig |= STORE_YES;
+ if ( x & STORE_COMPRESS )
+ newConfig |= STORE_COMPRESS;
+ }else
+ newConfig |= STORE_NO;
+
+ if ( (x & INDEX_NO)==0 ){
+ bool index=false;
+
+ if ( x & INDEX_NONORMS ){
+ newConfig |= INDEX_NONORMS;
+ index = true;
+ }
+
+ if ( x & INDEX_TOKENIZED && x & INDEX_UNTOKENIZED )
+ _CLTHROWA(CL_ERR_IllegalArgument,"it doesn't make sense to have an untokenised and tokenised field");
+ if ( x & INDEX_TOKENIZED ){
+ newConfig |= INDEX_TOKENIZED;
+ index = true;
+ }
+ if ( x & INDEX_UNTOKENIZED ){
+ newConfig |= INDEX_UNTOKENIZED;
+ index = true;
+ }
+ if ( !index )
+ newConfig |= INDEX_NO;
+ }else
+ newConfig |= INDEX_NO;
+
+ if ( newConfig & INDEX_NO && newConfig & STORE_NO )
+ _CLTHROWA(CL_ERR_IllegalArgument,"it doesn't make sense to have a field that is neither indexed nor stored");
+
+ //set termvector settings
+ if ( (x & TERMVECTOR_NO) == 0 ){
+ bool termVector=false;
+ if ( x & TERMVECTOR_YES ){
+ termVector=true;
+ }
+ if ( x & TERMVECTOR_WITH_OFFSETS ){
+ newConfig |= TERMVECTOR_WITH_OFFSETS;
+ termVector=true;
+ }
+ if ( x & TERMVECTOR_WITH_POSITIONS ){
+ newConfig |= TERMVECTOR_WITH_POSITIONS;
+ termVector=true;
+ }
+ if ( termVector ){
+ if ( newConfig & INDEX_NO )
+ _CLTHROWA(CL_ERR_IllegalArgument,"cannot store a term vector for fields that are not indexed.");
+
+ newConfig |= TERMVECTOR_YES;
+ }else
+ newConfig |= TERMVECTOR_NO;
+ }else
+ newConfig |= TERMVECTOR_NO;
+
+ config = newConfig;
+}
+
+TCHAR* Field::toString() {
+ CL_NS(util)::StringBuffer result;
+ if (isStored()) {
+ result.append( _T("stored") );
+ }
+ if (isIndexed()) {
+ if (result.length() > 0)
+ result.append( _T(",") );
+ result.append( _T("indexed") );
+ }
+ if (isTokenized()) {
+ if (result.length() > 0)
+ result.append( _T(",") );
+ result.append( _T("tokenized") );
+ }
+ if (isTermVectorStored()) {
+ if (result.length() > 0)
+ result.append( _T(",") );
+ result.append( _T("termVector") );
+ }
+ if (isStoreOffsetWithTermVector()) {
+ if (result.length() > 0)
+ result.appendChar( ',' );
+ result.append( _T("termVectorOffsets") );
+ }
+ if (isStorePositionWithTermVector()) {
+ if (result.length() > 0)
+ result.appendChar( ',' );
+ result.append( _T("termVectorPosition") );
+ }
+ if (isBinary()) {
+ if (result.length() > 0)
+ result.appendChar( ',' );
+ result.append( _T("binary") );
+ }
+ if (getOmitNorms()) {
+ result.append( _T(",omitNorms") );
+ }
+ result.appendChar('<');
+ result.append(name());
+ result.appendChar(':');
+
+ if (_stringValue != NULL)
+ result.append(_stringValue);
+ else if ( _readerValue != NULL )
+ result.append( _T("Reader") );
+ else if ( _streamValue != NULL )
+ result.append( _T("Stream") );
+ else
+ result.append( _T("NULL") );
+
+ result.appendChar('>');
+ return result.toString();
+}
+
+
+Field* Field::Keyword(const TCHAR* Name, const TCHAR* Value) {
+ return _CLNEW Field(Name,Value,Field::STORE_YES | Field::INDEX_UNTOKENIZED);
+}
+
+Field* Field::UnIndexed(const TCHAR* Name, const TCHAR* Value) {
+ return _CLNEW Field(Name,Value,Field::STORE_YES | Field::INDEX_NO);
+}
+
+Field* Field::Text(const TCHAR* Name, const TCHAR* Value, const bool storeTermVector) {
+ if ( storeTermVector )
+ return _CLNEW Field(Name,Value,Field::STORE_YES | Field::INDEX_TOKENIZED | Field::TERMVECTOR_YES);
+ else
+ return _CLNEW Field(Name,Value,Field::STORE_YES | Field::INDEX_TOKENIZED);
+}
+
+Field* Field::UnStored(const TCHAR* Name, const TCHAR* Value, const bool storeTermVector) {
+ if ( storeTermVector )
+ return _CLNEW Field(Name,Value,Field::STORE_NO | Field::INDEX_TOKENIZED | Field::TERMVECTOR_YES);
+ else
+ return _CLNEW Field(Name,Value,Field::STORE_NO | Field::INDEX_TOKENIZED);
+}
+
+Field* Field::Text(const TCHAR* Name, Reader* Value, const bool storeTermVector) {
+ if ( storeTermVector )
+ return _CLNEW Field(Name,Value,Field::INDEX_TOKENIZED | Field::TERMVECTOR_YES);
+ else
+ return _CLNEW Field(Name,Value,Field::INDEX_TOKENIZED);
+}
+
+CL_NS_END
diff --git a/src/assistant/3rdparty/clucene/src/CLucene/document/Field.h b/src/assistant/3rdparty/clucene/src/CLucene/document/Field.h
new file mode 100644
index 000000000..771a1382b
--- /dev/null
+++ b/src/assistant/3rdparty/clucene/src/CLucene/document/Field.h
@@ -0,0 +1,261 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_document_Field_
+#define _lucene_document_Field_
+
+#if defined(_LUCENE_PRAGMA_ONCE)
+# pragma once
+#endif
+
+#include "CLucene/util/Reader.h"
+#include "CLucene/util/streambase.h"
+
+CL_NS_DEF(document)
+/**
+A field is a section of a Document. Each field has two parts, a name and a
+value. Values may be free text, provided as a String or as a Reader, or they
+may be atomic keywords, which are not further processed. Such keywords may
+be used to represent dates, urls, etc. Fields are optionally stored in the
+index, so that they may be returned with hits on the document.
+
+PORTING: CLucene doesn't directly support compressed fields. However, it is easy
+to reproduce this functionality by using the GZip streams in the contrib package.
+Also note that binary fields are not read immediately in CLucene, a substream
+is pointed directly to the field's data, in affect creating a lazy load ability.
+This means that large fields are best saved in binary format (even if they are
+text), so that they can be loaded lazily.
+*/
+class Field :LUCENE_BASE{
+private:
+ const TCHAR* _name;
+ TCHAR* _stringValue;
+ CL_NS(util)::Reader* _readerValue;
+ jstreams::StreamBase<char>* _streamValue;
+
+ int config;
+ qreal boost;
+ bool omitNorms;
+public:
+ enum Store{
+ /** Store the original field value in the index. This is useful for short texts
+ * like a document's title which should be displayed with the results. The
+ * value is stored in its original form, i.e. no analyzer is used before it is
+ * stored.
+ */
+ STORE_YES=1,
+ /** Do not store the field value in the index. */
+ STORE_NO=2,
+
+ /** Store the original field value in the index in a compressed form. This is
+ * useful for long documents and for binary valued fields.
+ * NOTE: CLucene does not directly support compressed fields, to store a
+ * compressed field.
+ * //TODO: need better documentation on how to add a compressed field
+ * //because actually we still need to write a GZipOutputStream...
+ */
+ STORE_COMPRESS=4
+ };
+
+ enum Index{
+ /** Do not index the field value. This field can thus not be searched,
+ * but one can still access its contents provided it is
+ * {@link Field::Store stored}. */
+ INDEX_NO=16,
+ /** Index the field's value so it can be searched. An Analyzer will be used
+ * to tokenize and possibly further normalize the text before its
+ * terms will be stored in the index. This is useful for common text.
+ */
+ INDEX_TOKENIZED=32,
+ /** Index the field's value without using an Analyzer, so it can be searched.
+ * As no analyzer is used the value will be stored as a single term. This is
+ * useful for unique Ids like product numbers.
+ */
+ INDEX_UNTOKENIZED=64,
+ /** Index the field's value without an Analyzer, and disable
+ * the storing of norms. No norms means that index-time boosting
+ * and field length normalization will be disabled. The benefit is
+ * less memory usage as norms take up one byte per indexed field
+ * for every document in the index.
+ */
+ INDEX_NONORMS=128
+ };
+
+ enum TermVector{
+ /** Do not store term vectors. */
+ TERMVECTOR_NO=256,
+ /** Store the term vectors of each document. A term vector is a list
+ * of the document's terms and their number of occurences in that document. */
+ TERMVECTOR_YES=512,
+ /**
+ * Store the term vector + token position information
+ *
+ * @see #YES
+ */
+ TERMVECTOR_WITH_POSITIONS=1024,
+ /**
+ * Store the term vector + Token offset information
+ *
+ * @see #YES
+ */
+ TERMVECTOR_WITH_OFFSETS=2048
+ };
+
+ _CL_DEPRECATED( another overload ) Field(const TCHAR* name, const TCHAR* value, bool store, bool index, bool token, const bool storeTermVector=false);
+ _CL_DEPRECATED( another overload ) Field(const TCHAR* name, CL_NS(util)::Reader* reader, bool store, bool index, bool token, const bool storeTermVector=false);
+
+ Field(const TCHAR* name, const TCHAR* value, int configs);
+ Field(const TCHAR* name, CL_NS(util)::Reader* reader, int configs);
+ Field(const TCHAR* name, jstreams::StreamBase<char>* stream, int configs);
+ ~Field();
+
+ /** Constructs a String-valued Field that is not tokenized, but is indexed
+ * and stored. Useful for non-text fields, e.g. date or url.
+ * @deprecated Use new Field(name,value,Field::STORE_YES | Field::INDEX_UNTOKENIZED)
+ */
+ _CL_DEPRECATED( new Field(*) ) static Field* Keyword(const TCHAR* name, const TCHAR* value);
+
+ /** Constructs a String-valued Field that is not tokenized nor indexed,
+ * but is stored in the index, for return with hits.
+ * @deprecated Use new Field(name,value,Field::STORE_YES | Field::INDEX_NO)
+ */
+ _CL_DEPRECATED( new Field(*) ) static Field* UnIndexed(const TCHAR* name, const TCHAR* value);
+
+ /** Constructs a String-valued Field that is tokenized and indexed,
+ * and is stored in the index, for return with hits. Useful for short text
+ * fields, like "title" or "subject".
+ * @deprecated Use new Field(name,value,Field::STORE_YES | Field::INDEX_TOKENIZED)
+ */
+ _CL_DEPRECATED( new Field(*) ) static Field* Text(const TCHAR* name, const TCHAR* value, const bool storeTermVector=false);
+
+ /** Constructs a String-valued Field that is tokenized and indexed,
+ * but that is not stored in the index.
+ * @deprecated Use new Field(name,value,Field::STORE_NO | Field::INDEX_TOKENIZED)
+ */
+ _CL_DEPRECATED( new Field(*) ) static Field* UnStored(const TCHAR* name, const TCHAR* value, const bool storeTermVector=false);
+
+ /** Constructs a Reader-valued Field that is tokenized and indexed, but is
+ * *not* stored in the index verbatim. Useful for longer text fields, like
+ * "body".
+ * @deprecated Use new Field(name,value, Field::INDEX_TOKENIZED)
+ */
+ _CL_DEPRECATED( new Field(*) ) static Field* Text(const TCHAR* name, CL_NS(util)::Reader* value, const bool storeTermVector=false);
+
+ /** The name of the field (e.g., "date", "subject", "title", "body", etc.)
+ * as an interned string. */
+ const TCHAR* name(); ///<returns reference
+
+ /** The value of the field as a String, or null. If null, the Reader value
+ * or binary value is used. Exactly one of stringValue(), readerValue() and
+ * streamValue() must be set. */
+ TCHAR* stringValue(); ///<returns reference
+
+ /** The value of the field as a reader, or null. If null, the String value
+ * or stream value is used. Exactly one of stringValue(), readerValue() and
+ * streamValue() must be set. */
+ CL_NS(util)::Reader* readerValue();
+
+ /** The value of the field as a String, or null. If null, the String value
+ * or Reader value is used. Exactly one of stringValue(), readerValue() and
+ * streamValue() must be set. */
+ jstreams::StreamBase<char>* streamValue();
+
+ // True iff the value of the field is to be stored in the index for return
+ // with search hits. It is an error for this to be true if a field is
+ // Reader-valued.
+ bool isStored();
+
+ // True iff the value of the field is to be indexed, so that it may be
+ // searched on.
+ bool isIndexed();
+
+ // True iff the value of the field should be tokenized as text prior to
+ // indexing. Un-tokenized fields are indexed as a single word and may not be
+ // Reader-valued.
+ bool isTokenized();
+
+ /** True if the value of the field is stored and compressed within the index
+ * NOTE: CLucene does not actually support compressed fields, Instead, a reader
+ * will be returned with a pointer to a SubIndexInputStream. A GZipInputStream
+ * and a UTF8 reader must be used to actually read the content. This flag
+ * will only be set if the index was created by another lucene implementation.
+ */
+ bool isCompressed();
+
+ //Set configs using XOR. This resets all the settings
+ //For example, to use term vectors with positions and offsets do:
+ //object->setConfig(TERMVECTOR_WITH_POSITIONS | TERMVECTOR_WITH_OFFSETS);
+ void setConfig(int termVector);
+
+ /** True iff the term or terms used to index this field are stored as a term
+ * vector, available from {@link IndexReader#getTermFreqVector(int32_t,TCHAR*)}.
+ * These methods do not provide access to the original content of the field,
+ * only to terms used to index it. If the original content must be
+ * preserved, use the <code>stored</code> attribute instead.
+ *
+ * @see IndexReader#getTermFreqVector(int32_t, String)
+ */
+ bool isTermVectorStored();
+
+ /**
+ * True iff terms are stored as term vector together with their offsets
+ * (start and end positon in source text).
+ */
+ bool isStoreOffsetWithTermVector();
+
+ /**
+ * True iff terms are stored as term vector together with their token positions.
+ */
+ bool isStorePositionWithTermVector();
+
+ /** Returns the boost factor for hits for this field.
+ *
+ * <p>The default value is 1.0.
+ *
+ * <p>Note: this value is not stored directly with the document in the index.
+ * Documents returned from {@link IndexReader#document(int)} and
+ * {@link Hits#doc(int)} may thus not have the same value present as when
+ * this field was indexed.
+ *
+ * @see #setBoost(float)
+ */
+ qreal getBoost();
+
+ /** Sets the boost factor hits on this field. This value will be
+ * multiplied into the score of all hits on this field of this document.
+ *
+ * <p>The boost is multiplied by {@link Document#getBoost()} of the document
+ * containing this field. If a document has multiple fields with the same
+ * name, all such values are multiplied together. This product is then
+ * multipled by the value {@link Similarity#lengthNorm(String,int)}, and
+ * rounded by {@link Similarity#encodeNorm(float)} before it is stored in the
+ * index. One should attempt to ensure that this product does not overflow
+ * the range of that encoding.
+ *
+ * @see Document#setBoost(float)
+ * @see Similarity#lengthNorm(String, int)
+ * @see Similarity#encodeNorm(float)
+ */
+ void setBoost(qreal value);
+
+ /** True iff the value of the filed is stored as binary */
+ bool isBinary();
+
+ /** True if norms are omitted for this indexed field */
+ bool getOmitNorms();
+
+ /** Expert:
+ *
+ * If set, omit normalization factors associated with this indexed field.
+ * This effectively disables indexing boosts and length normalization for this field.
+ */
+ void setOmitNorms(bool omitNorms);
+
+ // Prints a Field for human consumption.
+ TCHAR* toString();
+};
+CL_NS_END
+#endif