/* * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. * * Changes are Copyright (C) 2012 Digia Plc and/or its subsidiary(-ies), all rights reserved. */ #include "CLucene/StdHeader.h" #include "FieldsReader.h" #include "CLucene/util/VoidMap.h" #include "CLucene/util/Misc.h" #include "CLucene/store/Directory.h" #include "CLucene/document/Document.h" #include "CLucene/document/Field.h" #include "FieldInfos.h" #include "FieldsWriter.h" CL_NS_USE(store) CL_NS_USE(document) CL_NS_USE(util) CL_NS_DEF(index) FieldsReader::FieldsReader(Directory* d, const QString& segment, FieldInfos* fn) : fieldInfos(fn) { //Func - Constructor //Pre - d contains a valid reference to a Directory // segment != NULL // fn contains a valid reference to a FieldInfos //Post - The instance has been created CND_PRECONDITION(!segment.isEmpty(), "segment != NULL"); QString buf = Misc::segmentname(segment, QLatin1String(".fdt")); fieldsStream = d->openInput(buf); buf = Misc::segmentname(segment, QLatin1String(".fdx")); indexStream = d->openInput(buf); _size = (int32_t)indexStream->length() / 8; } FieldsReader::~FieldsReader() { //Func - Destructor //Pre - true //Post - The instance has been destroyed close(); } void FieldsReader::close() { //Func - Closes the FieldsReader //Pre - true //Post - The FieldsReader has been closed if (fieldsStream) { fieldsStream->close(); _CLDELETE(fieldsStream); } if(indexStream) { indexStream->close(); _CLDELETE(indexStream); } } int32_t FieldsReader::size() const { return _size; } bool FieldsReader::doc(int32_t n, Document* doc) { if ( n * 8L > indexStream->length() ) return false; indexStream->seek(n * 8L); int64_t position = indexStream->readLong(); fieldsStream->seek(position); int32_t numFields = fieldsStream->readVInt(); for (int32_t i = 0; i < numFields; i++) { int32_t fieldNumber = fieldsStream->readVInt(); FieldInfo* fi = fieldInfos->fieldInfo(fieldNumber); if ( fi == NULL ) _CLTHROWA(CL_ERR_IO, "Field stream is invalid"); uint8_t bits = fieldsStream->readByte(); if ((bits & FieldsWriter::FIELD_IS_BINARY) != 0) { int32_t fieldLen = fieldsStream->readVInt(); FieldsReader::FieldsStreamHolder* subStream = new FieldsReader::FieldsStreamHolder(fieldsStream, fieldLen); uint8_t bits = Field::STORE_YES; Field* f = _CLNEW Field( fi->name, // name subStream, // read value bits); doc->add(*f); //now skip over the rest of the field if (fieldsStream->getFilePointer() + fieldLen == fieldsStream->length()) { // set to eof fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen - 1); fieldsStream->readByte(); } else { fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen); } } else { uint8_t bits = Field::STORE_YES; if (fi->isIndexed && (bits & FieldsWriter::FIELD_IS_TOKENIZED)!=0 ) bits |= Field::INDEX_TOKENIZED; else if (fi->isIndexed && (bits & FieldsWriter::FIELD_IS_TOKENIZED) == 0) bits |= Field::INDEX_UNTOKENIZED; else bits |= Field::INDEX_NO; if (fi->storeTermVector) { if (fi->storeOffsetWithTermVector) { if (fi->storePositionWithTermVector) { bits |= Field::TERMVECTOR_WITH_OFFSETS; bits |= Field::TERMVECTOR_WITH_POSITIONS; } else { bits |= Field::TERMVECTOR_WITH_OFFSETS; } } else if (fi->storePositionWithTermVector) { bits |= Field::TERMVECTOR_WITH_POSITIONS; } else { bits |= Field::TERMVECTOR_YES; } } else { bits |= Field::TERMVECTOR_NO; } if ( (bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0 ) { bits |= Field::STORE_COMPRESS; int32_t fieldLen = fieldsStream->readVInt(); FieldsStreamHolder* subStream = new FieldsStreamHolder(fieldsStream, fieldLen); // TODO: we dont have gzip inputstream available, must alert // user to somehow use a gzip inputstream Field* f = _CLNEW Field( fi->name, // name subStream, // read value bits); f->setOmitNorms(fi->omitNorms); doc->add(*f); // now skip over the rest of the field if (fieldsStream->getFilePointer() + fieldLen == fieldsStream->length()) { //set to eof fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen - 1); fieldsStream->readByte(); } else { fieldsStream->seek(fieldsStream->getFilePointer() + fieldLen); } } else { TCHAR* fvalue = fieldsStream->readString(true); Field* f = _CLNEW Field( fi->name, // name fvalue, // read value bits); // TODO: could optimise this _CLDELETE_CARRAY(fvalue); f->setOmitNorms(fi->omitNorms); doc->add(*f); } } } return true; } FieldsReader::FieldsStreamHolder::FieldsStreamHolder(IndexInput* indexInput, int32_t subLength) { this->indexInput = indexInput->clone(); this->indexInputStream = new IndexInputStream(this->indexInput); this->subStream = new jstreams::SubInputStream(indexInputStream, subLength); this->size = subStream->getSize(); this->position = subStream->getPosition(); this->error = subStream->getError(); this->status = subStream->getStatus(); } FieldsReader::FieldsStreamHolder::~FieldsStreamHolder() { delete subStream; delete indexInputStream; indexInput->close(); _CLDELETE(indexInput); } int32_t FieldsReader::FieldsStreamHolder::read(const char*& start, int32_t _min, int32_t _max) { int32_t ret = subStream->read(start,_min,_max); this->position = subStream->getPosition(); this->error = subStream->getError(); this->status = subStream->getStatus(); return ret; } int64_t FieldsReader::FieldsStreamHolder::skip(int64_t ntoskip) { int64_t ret = subStream->skip(ntoskip); this->position = subStream->getPosition(); this->error = subStream->getError(); this->status = subStream->getStatus(); return ret; } int64_t FieldsReader::FieldsStreamHolder::reset(int64_t pos) { int64_t ret = subStream->reset(pos); this->position = subStream->getPosition(); this->error = subStream->getError(); this->status = subStream->getStatus(); return ret; } CL_NS_END