diff options
author | Alexander Barkov <bar@mnogosearch.org> | 2013-02-07 13:34:27 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mnogosearch.org> | 2013-02-07 13:34:27 +0400 |
commit | 30c4b0ebc24fe0106e146b1f6577a4150e71e258 (patch) | |
tree | 592673d8c2f5b418833c3ee5fcfeb9dbf4ba1681 /storage/connect/tabfmt.cpp | |
parent | 60c4cab3bd00621cc03afb1be6de01c8fab0c5f0 (diff) | |
download | mariadb-git-30c4b0ebc24fe0106e146b1f6577a4150e71e258.tar.gz |
- Fixing TAB to 2 spaces
- Fixing line endings from "\r\n" to "\n"
Diffstat (limited to 'storage/connect/tabfmt.cpp')
-rw-r--r-- | storage/connect/tabfmt.cpp | 2782 |
1 files changed, 1391 insertions, 1391 deletions
diff --git a/storage/connect/tabfmt.cpp b/storage/connect/tabfmt.cpp index f2efa775d5e..c62337b26d1 100644 --- a/storage/connect/tabfmt.cpp +++ b/storage/connect/tabfmt.cpp @@ -1,1391 +1,1391 @@ -/************* TabFmt C++ Program Source Code File (.CPP) **************/
-/* PROGRAM NAME: TABFMT */
-/* ------------- */
-/* Version 3.7 */
-/* */
-/* COPYRIGHT: */
-/* ---------- */
-/* (C) Copyright to the author Olivier BERTRAND 2001 - 2013 */
-/* */
-/* WHAT THIS PROGRAM DOES: */
-/* ----------------------- */
-/* This program are the TABFMT classes DB execution routines. */
-/* The base class CSV is comma separated files. */
-/* FMT (Formatted) files are those having a complex internal record */
-/* format described in the Format keyword of their definition. */
-/***********************************************************************/
-
-/***********************************************************************/
-/* Include relevant MariaDB header file. */
-/***********************************************************************/
-#include "my_global.h"
-
-#if defined(WIN32)
-#include <io.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <locale.h>
-#if defined(__BORLANDC__)
-#define __MFC_COMPAT__ // To define min/max as macro
-#endif
-//#include <windows.h>
-#include "osutil.h"
-#else
-#if defined(UNIX)
-#include <errno.h>
-#include <unistd.h>
-#include "osutil.h"
-#else
-#include <io.h>
-#endif
-#include <fcntl.h>
-#endif
-
-/***********************************************************************/
-/* Include application header files: */
-/* global.h is header containing all global declarations. */
-/* plgdbsem.h is header containing the DB application declarations. */
-/* tabdos.h is header containing the TABDOS class declarations. */
-/***********************************************************************/
-#include "global.h"
-#include "plgdbsem.h"
-#include "filamap.h"
-#if defined(ZIP_SUPPORT)
-#include "filamzip.h"
-#endif // ZIP_SUPPORT
-#include "tabfmt.h"
-#include "tabmul.h"
-#define NO_FUNC
-#include "plgcnx.h" // For DB types
-#include "resource.h"
-
-/***********************************************************************/
-/* This should be an option. */
-/***********************************************************************/
-#define MAXCOL 200 /* Default max column nb in result */
-#define TYPE_UNKNOWN 10 /* Must be greater than other types */
-
-extern "C" int trace;
-
-/***********************************************************************/
-/* CSV Catalog utility functions. */
-/***********************************************************************/
-PQRYRES PlgAllocResult(PGLOBAL, int, int, int, int *, int *,
- unsigned int *, bool blank = true, bool nonull = false);
-
-/***********************************************************************/
-/* CSVColumns: constructs the result blocks containing the description */
-/* of all the columns of a CSV file that will be retrieved by #GetData.*/
-/* Note: the algorithm to set the type is based on the internal values */
-/* of types (TYPE_STRING < TYPE_FLOAT < TYPE_INT) (1 < 2 < 7). */
-/* If these values are changed, this will have to be revisited. */
-/***********************************************************************/
-PQRYRES CSVColumns(PGLOBAL g, char *fn, char sep, char q, int hdr, int mxr)
- {
- static int dbtype[] = {DB_CHAR, DB_SHORT, DB_CHAR,
- DB_INT, DB_INT, DB_SHORT};
- static int buftyp[] = {TYPE_STRING, TYPE_SHORT, TYPE_STRING,
- TYPE_INT, TYPE_INT, TYPE_SHORT};
- static unsigned int length[] = {6, 6, 8, 10, 10, 6};
- char *p, *colname[MAXCOL], dechar, filename[_MAX_PATH], buf[4096];
- int i, imax, hmax, n, nerr, phase, blank, digit, dec, type;
- int ncol = sizeof(dbtype) / sizeof(int);
- int num_read = 0, num_max = 10000000; // Statistics
- int len[MAXCOL], typ[MAXCOL], prc[MAXCOL];
- FILE *infile;
- PQRYRES qrp;
- PCOLRES crp;
-
-// num_max = atoi(p+1); // Max num of record to test
-#if defined(WIN32)
- if (strnicmp(setlocale(LC_NUMERIC, NULL), "French", 6))
- dechar = '.';
- else
- dechar = ',';
-#else // !WIN32
- dechar = '.';
-#endif // !WIN32
-
- if (trace)
- htrc("File %s sep=%c q=%c hdr=%d mxr=%d\n",
- SVP(fn), sep, q, hdr, mxr);
-
- if (!fn) {
- strcpy(g->Message, MSG(MISSING_FNAME));
- return NULL;
- } // endif fn
-
- imax = hmax = nerr = 0;
- mxr = max(0, mxr);
-
- for (i = 0; i < MAXCOL; i++) {
- colname[i] = NULL;
- len[i] = 0;
- typ[i] = TYPE_UNKNOWN;
- prc[i] = 0;
- } // endfor i
-
- /*********************************************************************/
- /* Open the input file. */
- /*********************************************************************/
- PlugSetPath(filename, fn, PlgGetDataPath(g));
-
- if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "r")))
- return NULL;
-
- if (hdr) {
- /*******************************************************************/
- /* Make the column names from the first line. */
- /*******************************************************************/
- phase = 0;
-
- if (fgets(buf, sizeof(buf), infile)) {
- n = strlen(buf) + 1;
- buf[n - 2] = '\0';
-#if defined(UNIX)
- // The file can be imported from Windows
- if (buf[n - 3] == '\r')
- buf[n - 3] = 0;
-#endif // UNIX
- p = (char*)PlugSubAlloc(g, NULL, n);
- memcpy(p, buf, n);
-
- //skip leading blanks
- for (; *p == ' '; p++) ;
-
- if (q && *p == q) {
- // Header is quoted
- p++;
- phase = 1;
- } // endif q
-
- colname[0] = p;
- } else {
- sprintf(g->Message, MSG(FILE_IS_EMPTY), fn);
- goto err;
- } // endif's
-
- for (i = 1; *p; p++)
- if (phase == 1 && *p == q) {
- *p = '\0';
- phase = 0;
- } else if (*p == sep && !phase) {
- *p = '\0';
-
- //skip leading blanks
- for (; *(p+1) == ' '; p++) ;
-
- if (q && *(p+1) == q) {
- // Header is quoted
- p++;
- phase = 1;
- } // endif q
-
- colname[i++] = p + 1;
- } // endif sep
-
- num_read++;
- imax = hmax = i;
-
- for (i = 0; i < hmax; i++)
- length[0] = max(length[0], strlen(colname[i]));
-
- } // endif hdr
-
- for (num_read++; num_read <= num_max; num_read++) {
- /*******************************************************************/
- /* Now start the reading process. Read one line. */
- /*******************************************************************/
- if (fgets(buf, sizeof(buf), infile)) {
- n = strlen(buf);
- buf[n - 1] = '\0';
-#if defined(UNIX)
- // The file can be imported from Windows
- if (buf[n - 2] == '\r')
- buf[n - 2] = 0;
-#endif // UNIX
- } else if (feof(infile)) {
- sprintf(g->Message, MSG(EOF_AFTER_LINE), num_read -1);
- break;
- } else {
- sprintf(g->Message, MSG(ERR_READING_REC), num_read, fn);
- goto err;
- } // endif's
-
- /*******************************************************************/
- /* Make the test for field lengths. */
- /*******************************************************************/
- i = n = phase = blank = digit = dec = 0;
-
- for (p = buf; *p; p++)
- if (*p == sep) {
- if (phase != 1) {
- if (i == MAXCOL - 1) {
- sprintf(g->Message, MSG(TOO_MANY_FIELDS), num_read, fn);
- goto err;
- } // endif i
-
- if (n) {
- len[i] = max(len[i], n);
- type = (digit || (dec && n == 1)) ? TYPE_STRING
- : (dec) ? TYPE_FLOAT : TYPE_INT;
- typ[i] = min(type, typ[i]);
- prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]);
- } // endif n
-
- i++;
- n = phase = blank = digit = dec = 0;
- } else // phase == 1
- n++;
-
- } else if (*p == ' ') {
- if (phase < 2)
- n++;
-
- if (blank)
- digit = 1;
-
- } else if (*p == q) {
- if (phase == 0) {
- if (blank)
- if (++nerr > mxr) {
- sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read);
- goto err;
- } else
- goto skip;
-
- n = 0;
- phase = digit = 1;
- } else if (phase == 1) {
- if (*(p+1) == q) {
- // This is currently not implemented for CSV tables
-// if (++nerr > mxr) {
-// sprintf(g->Message, MSG(QUOTE_IN_QUOTE), num_read);
-// goto err;
-// } else
-// goto skip;
-
- p++;
- n++;
- } else
- phase = 2;
-
- } else if (++nerr > mxr) { // phase == 2
- sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read);
- goto err;
- } else
- goto skip;
-
- } else {
- if (phase == 2)
- if (++nerr > mxr) {
- sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read);
- goto err;
- } else
- goto skip;
-
- // isdigit cannot be used here because of debug assert
- if (!strchr("0123456789", *p)) {
- if (!digit && *p == dechar)
- dec = 1; // Decimal point found
- else if (blank || !(*p == '-' || *p == '+'))
- digit = 1;
-
- } else if (dec)
- dec++; // More decimals
-
- n++;
- blank = 1;
- } // endif's *p
-
- if (phase == 1)
- if (++nerr > mxr) {
- sprintf(g->Message, MSG(UNBALANCE_QUOTE), num_read);
- goto err;
- } else
- goto skip;
-
- if (n) {
- len[i] = max(len[i], n);
- type = (digit || n == 0 || (dec && n == 1)) ? TYPE_STRING
- : (dec) ? TYPE_FLOAT : TYPE_INT;
- typ[i] = min(type, typ[i]);
- prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]);
- } // endif n
-
- imax = max(imax, i+1);
- skip: ; // Skip erroneous line
- } // endfor num_read
-
- if (trace) {
- htrc("imax=%d Lengths:", imax);
-
- for (i = 0; i < imax; i++)
- htrc(" %d", len[i]);
-
- htrc("\n");
- } // endif trace
-
- fclose(infile);
-
- if (trace)
- htrc("CSVColumns: imax=%d hmax=%d len=%d\n",
- imax, hmax, length[0]);
-
- /*********************************************************************/
- /* Allocate the structures used to refer to the result set. */
- /*********************************************************************/
- qrp = PlgAllocResult(g, ncol, imax, IDS_COLUMNS + 3,
- dbtype, buftyp, length);
- qrp->Nblin = imax;
-
- /*********************************************************************/
- /* Now get the results into blocks. */
- /*********************************************************************/
- for (i = 0; i < imax; i++) {
- if (i >= hmax) {
- sprintf(buf, "COL%.3d", i+1);
- p = buf;
- } else
- p = colname[i];
-
- if (typ[i] == TYPE_UNKNOWN) // Void column
- typ[i] = TYPE_STRING;
-
- crp = qrp->Colresp; // Column Name
- crp->Kdata->SetValue(p, i);
- crp = crp->Next; // Data Type
- crp->Kdata->SetValue(typ[i], i);
- crp = crp->Next; // Type Name
- crp->Kdata->SetValue(GetTypeName(typ[i]), i);
- crp = crp->Next; // Precision
- crp->Kdata->SetValue(len[i], i);
- crp = crp->Next; // Length
- crp->Kdata->SetValue(len[i], i);
- crp = crp->Next; // Scale (precision)
- crp->Kdata->SetValue(prc[i], i);
- } // endfor i
-
- /*********************************************************************/
- /* Return the result pointer for use by GetData routines. */
- /*********************************************************************/
- return qrp;
-
- err:
- fclose(infile);
- return NULL;
- } // end of CSVCColumns
-
-/* --------------------------- Class CSVDEF -------------------------- */
-
-/***********************************************************************/
-/* CSVDEF constructor. */
-/***********************************************************************/
-CSVDEF::CSVDEF(void)
- {
- Fmtd = Accept = Header = false;
- Maxerr = 0;
- Quoted = -1;
- Sep = ',';
- Qot = '\0';
- } // end of CSVDEF constructor
-
-/***********************************************************************/
-/* DefineAM: define specific AM block values from XDB file. */
-/***********************************************************************/
-bool CSVDEF::DefineAM(PGLOBAL g, LPCSTR am, int poff)
- {
- char buf[8];
-
- // Double check correctness of offset values
- for (PCOLDEF cdp = To_Cols; cdp; cdp = cdp->GetNext())
- if (cdp->GetOffset() < 1) {
- strcpy(g->Message, MSG(BAD_OFFSET_VAL));
- return true;
- } // endif Offset
-
- // Call DOSDEF DefineAM with am=CSV so FMT is not confused with FIX
- if (DOSDEF::DefineAM(g, "CSV", poff))
- return true;
-
- Cat->GetCharCatInfo(Name, "Separator", ",", buf, sizeof(buf));
- Sep = (strlen(buf) == 2 && buf[0] == '\\' && buf[1] == 't') ? '\t' : *buf;
- Quoted = Cat->GetIntCatInfo(Name, "Quoted", -1);
- Cat->GetCharCatInfo(Name, "Qchar", "", buf, sizeof(buf));
- Qot = *buf;
-
- if (Qot && Quoted < 0)
- Quoted = 0;
- else if (!Qot && Quoted >= 0)
- Qot = '"';
-
- Fmtd = (!Sep || (am && (*am == 'F' || *am == 'f')));
- Header = (Cat->GetIntCatInfo(Name, "Header", 0) != 0);
- Maxerr = Cat->GetIntCatInfo(Name, "Maxerr", 0);
- Accept = (Cat->GetIntCatInfo(Name, "Accept", 0) != 0);
- return false;
- } // end of DefineAM
-
-/***********************************************************************/
-/* GetTable: makes a new Table Description Block. */
-/***********************************************************************/
-PTDB CSVDEF::GetTable(PGLOBAL g, MODE mode)
- {
- USETEMP tmp = PlgGetUser(g)->UseTemp;
- bool map = Mapped && mode != MODE_INSERT &&
- !(tmp != TMP_NO && mode == MODE_UPDATE) &&
- !(tmp == TMP_FORCE &&
- (mode == MODE_UPDATE || mode == MODE_DELETE));
- PTXF txfp;
- PTDBASE tdbp;
-
- /*********************************************************************/
- /* Allocate a file processing class of the proper type. */
- /*********************************************************************/
- if (map) {
- // Should be now compatible with UNIX
- txfp = new(g) MAPFAM(this);
- } else if (Compressed) {
-#if defined(ZIP_SUPPORT)
- if (Compressed == 1)
- txfp = new(g) ZIPFAM(this);
- else {
- strcpy(g->Message, "Compress 2 not supported yet");
-// txfp = new(g) ZLBFAM(defp);
- return NULL;
- } // endelse
-#else // !ZIP_SUPPORT
- strcpy(g->Message, "Compress not supported");
- return NULL;
-#endif // !ZIP_SUPPORT
- } else
- txfp = new(g) DOSFAM(this);
-
- /*********************************************************************/
- /* Allocate a TDB of the proper type. */
- /* Column blocks will be allocated only when needed. */
- /*********************************************************************/
- if (!Fmtd)
- tdbp = new(g) TDBCSV(this, txfp);
- else
- tdbp = new(g) TDBFMT(this, txfp);
-
- if (Multiple)
- tdbp = new(g) TDBMUL(tdbp);
-
- return tdbp;
- } // end of GetTable
-
-/* -------------------------- Class TDBCSV --------------------------- */
-
-/***********************************************************************/
-/* Implementation of the TDBCSV class. */
-/***********************************************************************/
-TDBCSV::TDBCSV(PCSVDEF tdp, PTXF txfp) : TDBDOS(tdp, txfp)
- {
-#if defined(_DEBUG)
- assert (tdp);
-#endif
- Field = NULL;
- Offset = NULL;
- Fldlen = NULL;
- Fields = 0;
- Nerr = 0;
- Quoted = tdp->Quoted;
- Maxerr = tdp->Maxerr;
- Accept = tdp->Accept;
- Header = tdp->Header;
- Sep = tdp->GetSep();
- Qot = tdp->GetQot();
- } // end of TDBCSV standard constructor
-
-TDBCSV::TDBCSV(PGLOBAL g, PTDBCSV tdbp) : TDBDOS(g, tdbp)
- {
- Fields = tdbp->Fields;
-
- if (Fields) {
- if (tdbp->Offset)
- Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields);
-
- if (tdbp->Fldlen)
- Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields);
-
- Field = (PSZ *)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields);
-
- for (int i = 0; i < Fields; i++) {
- if (Offset)
- Offset[i] = tdbp->Offset[i];
-
- if (Fldlen)
- Fldlen[i] = tdbp->Fldlen[i];
-
- if (Field) {
- assert (Fldlen);
- Field[i] = (PSZ)PlugSubAlloc(g, NULL, Fldlen[i] + 1);
- Field[i][Fldlen[i]] = '\0';
- } // endif Field
-
- } // endfor i
-
- } else {
- Field = NULL;
- Offset = NULL;
- Fldlen = NULL;
- } // endif Fields
-
- Nerr = tdbp->Nerr;
- Maxerr = tdbp->Maxerr;
- Quoted = tdbp->Quoted;
- Accept = tdbp->Accept;
- Header = tdbp->Header;
- Sep = tdbp->Sep;
- Qot = tdbp->Qot;
- } // end of TDBCSV copy constructor
-
-// Method
-PTDB TDBCSV::CopyOne(PTABS t)
- {
- PTDB tp;
- PCSVCOL cp1, cp2;
- PGLOBAL g = t->G; // Is this really useful ???
-
- tp = new(g) TDBCSV(g, this);
-
- for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) {
- cp2 = new(g) CSVCOL(cp1, tp); // Make a copy
- NewPointer(t, cp1, cp2);
- } // endfor cp1
-
- return tp;
- } // end of CopyOne
-
-/***********************************************************************/
-/* Allocate CSV column description block. */
-/***********************************************************************/
-PCOL TDBCSV::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n)
- {
- return new(g) CSVCOL(g, cdp, this, cprec, n);
- } // end of MakeCol
-
-/***********************************************************************/
-/* Check whether the number of errors is greater than the maximum. */
-/***********************************************************************/
-bool TDBCSV::CheckErr(void)
- {
- return (++Nerr) > Maxerr;
- } // end of CheckErr
-
-/***********************************************************************/
-/* CSV EstimatedLength. Returns an estimated minimum line length. */
-/***********************************************************************/
-int TDBCSV::EstimatedLength(PGLOBAL g)
- {
- if (trace)
- htrc("EstimatedLength: Fields=%d Columns=%p\n", Fields, Columns);
-
- if (!Fields) {
- PCSVCOL colp;
-
- for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next)
- if (!colp->IsSpecial()) // Not a pseudo column
- Fields = max(Fields, (int)colp->Fldnum);
-
- if (Columns)
- Fields++; // Fldnum was 0 based
-
- } // endif Fields
-
- return (int)Fields; // Number of separators if all fields are null
- } // end of Estimated Length
-
-#if 0
-/***********************************************************************/
-/* CSV tables favor the use temporary files for Update. */
-/***********************************************************************/
-bool TDBCSV::IsUsingTemp(PGLOBAL g)
- {
- USETEMP usetemp = PlgGetUser(g)->UseTemp;
-
- return (usetemp == TMP_YES || usetemp == TMP_FORCE ||
- (usetemp == TMP_AUTO && Mode == MODE_UPDATE));
- } // end of IsUsingTemp
-#endif // 0 (Same as TDBDOS one)
-
-/***********************************************************************/
-/* CSV Access Method opening routine. */
-/* First allocate the Offset and Fldlen arrays according to the */
-/* greatest field used in that query. Then call the DOS opening fnc. */
-/***********************************************************************/
-bool TDBCSV::OpenDB(PGLOBAL g)
- {
- bool rc = false;
- PCOLDEF cdp;
- PDOSDEF tdp = (PDOSDEF)To_Def;
-
- if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) {
- // Allocate the storage used to read (or write) records
- int i, len;
- PCSVCOL colp;
-
- if (!Fields) // May have been set in TABFMT::OpenDB
- if (Mode != MODE_UPDATE && Mode != MODE_INSERT) {
- for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next)
- if (!colp->IsSpecial()) // Not a pseudo column
- Fields = max(Fields, (int)colp->Fldnum);
-
- if (Columns)
- Fields++; // Fldnum was 0 based
-
- } else
- for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext())
- Fields++;
-
- Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields);
- Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields);
-
- if (Mode == MODE_INSERT || Mode == MODE_UPDATE) {
- Field = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields);
- Fldtyp = (bool*)PlugSubAlloc(g, NULL, sizeof(bool) * Fields);
- } // endif Mode
-
- for (i = 0; i < Fields; i++) {
- Offset[i] = 0;
- Fldlen[i] = 0;
-
- if (Field) {
- Field[i] = NULL;
- Fldtyp[i] = false;
- } // endif Field
-
- } // endfor i
-
- if (Field)
- // Prepare writing fields
- if (Mode != MODE_UPDATE)
- for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) {
- i = colp->Fldnum;
- len = colp->GetLength();
- Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1);
- Field[i][len] = '\0';
- Fldlen[i] = len;
- Fldtyp[i] = IsTypeNum(colp->GetResultType());
- } // endfor colp
-
- else // MODE_UPDATE
- for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) {
- i = cdp->GetOffset() - 1;
- len = cdp->GetLength();
- Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1);
- Field[i][len] = '\0';
- Fldlen[i] = len;
- Fldtyp[i] = IsTypeNum(cdp->GetType());
- } // endfor colp
-
- } // endif Use
-
- if (Header) {
- // Check that the Lrecl is at least equal to the header line length
- int headlen = 0;
- PCOLDEF cdp;
- PDOSDEF tdp = (PDOSDEF)To_Def;
-
- for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext())
- headlen += strlen(cdp->GetName()) + 3; // 3 if names are quoted
-
- if (headlen > Lrecl) {
- Lrecl = headlen;
- Txfp->Lrecl = headlen;
- } // endif headlen
-
- } // endif Header
-
- Nerr = 0;
- rc = TDBDOS::OpenDB(g);
-
- if (!rc && Mode == MODE_UPDATE && To_Kindex)
- // Because KINDEX::Init is executed in mode READ, we must restore
- // the Fldlen array that was modified when reading the table file.
- for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext())
- Fldlen[cdp->GetOffset() - 1] = cdp->GetLength();
-
- return rc;
- } // end of OpenDB
-
-/***********************************************************************/
-/* SkipHeader: Physically skip first header line if applicable. */
-/* This is called from TDBDOS::OpenDB and must be executed before */
-/* Kindex construction if the file is accessed using an index. */
-/***********************************************************************/
-bool TDBCSV::SkipHeader(PGLOBAL g)
- {
- int len = GetFileLength(g);
- bool rc = false;
-
-#if defined(_DEBUG)
- if (len < 0)
- return true;
-#endif // _DEBUG
-
- if (Header) {
- if (Mode == MODE_INSERT) {
- if (!len) {
- // New file, the header line must be constructed and written
- int i, n = 0;
- int hlen = 0;
- bool q = Qot && Quoted > 0;
- PCOLDEF cdp;
-
- // Estimate the length of the header list
- for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext()) {
- hlen += (1 + strlen(cdp->GetName()));
- hlen += ((q) ? 2 : 0);
- n++; // Calculate the number of columns
- } // endfor cdp
-
- if (hlen > Lrecl) {
- sprintf(g->Message, MSG(LRECL_TOO_SMALL), hlen);
- return true;
- } // endif hlen
-
- // File is empty, write a header record
- memset(To_Line, 0, Lrecl);
-
- // The column order in the file is given by the offset value
- for (i = 1; i <= n; i++)
- for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext())
- if (cdp->GetOffset() == i) {
- if (q)
- To_Line[strlen(To_Line)] = Qot;
-
- strcat(To_Line, cdp->GetName());
-
- if (q)
- To_Line[strlen(To_Line)] = Qot;
-
- if (i < n)
- To_Line[strlen(To_Line)] = Sep;
-
- } // endif Offset
-
- rc = (Txfp->WriteBuffer(g) == RC_FX);
- } // endif !FileLength
-
- } else if (Mode == MODE_DELETE) {
- if (len)
- rc = (Txfp->SkipRecord(g, true) == RC_FX);
-
- } else if (len) // !Insert && !Delete
- rc = (Txfp->SkipRecord(g, false) == RC_FX || Txfp->RecordPos(g));
-
- } // endif Header
-
- return rc;
- } // end of SkipHeader
-
-/***********************************************************************/
-/* ReadBuffer: Physical read routine for the CSV access method. */
-/***********************************************************************/
-int TDBCSV::ReadBuffer(PGLOBAL g)
- {
- char *p1, *p2, *p = NULL;
- int i, n, len, rc = Txfp->ReadBuffer(g);
- bool bad = false;
-
- if (trace > 1)
- htrc("CSV: Row is '%s' rc=%d\n", To_Line, rc);
-
- if (rc != RC_OK || !Fields)
- return rc;
- else
- p2 = To_Line;
-
- // Find the offsets and lengths of the columns for this row
- for (i = 0; i < Fields; i++) {
- if (!bad) {
- if (Qot && *p2 == Qot) { // Quoted field
- for (n = 0, p1 = ++p2; (p = strchr(p1, Qot)); p1 = p + 2)
- if (*(p + 1) == Qot)
- n++; // Doubled internal quotes
- else
- break; // Final quote
-
- if (p) {
- len = p++ - p2;
-
-// if (Sep != ' ')
-// for (; *p == ' '; p++) ; // Skip blanks
-
- if (*p != Sep && i != Fields - 1) { // Should be the separator
- if (CheckErr()) {
- sprintf(g->Message, MSG(MISSING_FIELD),
- i+1, Name, RowNumber(g));
- return RC_FX;
- } else if (Accept)
- bad = true;
- else
- return RC_NF;
-
- } // endif p
-
- if (n) {
- int j, k;
-
- // Suppress the double of internal quotes
- for (j = k = 0; j < len; j++, k++) {
- if (p2[j] == Qot)
- j++; // skip first one
-
- p2[k] = p2[j];
- } // endfor i, j
-
- len -= n;
- } // endif n
-
- } else if (CheckErr()) {
- sprintf(g->Message, MSG(BAD_QUOTE_FIELD),
- Name, i+1, RowNumber(g));
- return RC_FX;
- } else if (Accept) {
- len = strlen(p2);
- bad = true;
- } else
- return RC_NF;
-
- } else if ((p = strchr(p2, Sep)))
- len = p - p2;
- else if (i == Fields - 1)
- len = strlen(p2);
- else if (Accept && Maxerr == 0) {
- len = strlen(p2);
- bad = true;
- } else if (CheckErr()) {
- sprintf(g->Message, MSG(MISSING_FIELD), i+1, Name, RowNumber(g));
- return RC_FX;
- } else if (Accept) {
- len = strlen(p2);
- bad = true;
- } else
- return RC_NF;
-
- } else
- len = 0;
-
- Offset[i] = p2 - To_Line;
-
- if (Mode != MODE_UPDATE)
- Fldlen[i] = len;
- else if (len > Fldlen[i]) {
- sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, RowNumber(g));
- return RC_FX;
- } else {
- strncpy(Field[i], p2, len);
- Field[i][len] = '\0';
- } // endif Mode
-
- if (p)
- p2 = p + 1;
-
- } // endfor i
-
- return rc;
- } // end of ReadBuffer
-
-/***********************************************************************/
-/* Data Base write routine CSV file access method. */
-/***********************************************************************/
-int TDBCSV::WriteDB(PGLOBAL g)
- {
- char sep[2], qot[2];
- int i, nlen, oldlen = strlen(To_Line);
-
- if (trace > 1)
- htrc("CSV WriteDB: R%d Mode=%d key=%p link=%p\n",
- Tdb_No, Mode, To_Key_Col, To_Link);
-
- // Before writing the line we must check its length
- if ((nlen = CheckWrite(g)) < 0)
- return RC_FX;
-
- // Before writing the line we must make it
- sep[0] = Sep;
- sep[1] = '\0';
- qot[0] = Qot;
- qot[1] = '\0';
- *To_Line = '\0';
-
- for (i = 0; i < Fields; i++) {
- if (i)
- strcat(To_Line, sep);
-
- if (Field[i])
- if (!strlen(Field[i])) {
- // Generally null fields are not quoted
- if (Quoted > 2)
- // Except if explicitely required
- strcat(strcat(To_Line, qot), qot);
-
- } else if (Qot && (strchr(Field[i], Sep) || *Field[i] == Qot
- || Quoted > 1 || (Quoted == 1 && !Fldtyp[i])))
- if (strchr(Field[i], Qot)) {
- // Field contains quotes that must be doubled
- int j, k = strlen(To_Line), n = strlen(Field[i]);
-
- To_Line[k++] = Qot;
-
- for (j = 0; j < n; j++) {
- if (Field[i][j] == Qot)
- To_Line[k++] = Qot;
-
- To_Line[k++] = Field[i][j];
- } // endfor j
-
- To_Line[k++] = Qot;
- To_Line[k] = '\0';
- } else
- strcat(strcat(strcat(To_Line, qot), Field[i]), qot);
-
- else
- strcat(To_Line, Field[i]);
-
- } // endfor i
-
-#if defined(_DEBUG)
- assert ((unsigned)nlen == strlen(To_Line));
-#endif
-
- if (Mode == MODE_UPDATE && nlen < oldlen
- && !((PDOSFAM)Txfp)->GetUseTemp()) {
- // In Update mode with no temp file, line length must not change
- To_Line[nlen] = Sep;
-
- for (nlen++; nlen < oldlen; nlen++)
- To_Line[nlen] = ' ';
-
- To_Line[nlen] = '\0';
- } // endif
-
- if (trace > 1)
- htrc("Write: line is=%s", To_Line);
-
- /*********************************************************************/
- /* Now start the writing process. */
- /*********************************************************************/
- return Txfp->WriteBuffer(g);
- } // end of WriteDB
-
-/***********************************************************************/
-/* Check whether a new line fit in the file lrecl size. */
-/***********************************************************************/
-int TDBCSV::CheckWrite(PGLOBAL g)
- {
- int maxlen, n, nlen = (Fields - 1);
-
- if (trace > 1)
- htrc("CheckWrite: R%d Mode=%d\n", Tdb_No, Mode);
-
- // Before writing the line we must check its length
- maxlen = (Mode == MODE_UPDATE && !Txfp->GetUseTemp())
- ? strlen(To_Line) : Lrecl;
-
- // Check whether record is too int
- for (int i = 0; i < Fields; i++)
- if (Field[i]) {
- if (!(n = strlen(Field[i])))
- n += (Quoted > 2 ? 2 : 0);
- else if (strchr(Field[i], Sep) || (Qot && *Field[i] == Qot)
- || Quoted > 1 || (Quoted == 1 && !Fldtyp[i]))
- if (!Qot) {
- sprintf(g->Message, MSG(SEP_IN_FIELD), i + 1);
- return -1;
- } else {
- // Quotes inside a quoted field must be doubled
- char *p1, *p2;
-
- for (p1 = Field[i]; (p2 = strchr(p1, Qot)); p1 = p2 + 1)
- n++;
-
- n += 2; // Outside quotes
- } // endif
-
- if ((nlen += n) > maxlen) {
- strcpy(g->Message, MSG(LINE_TOO_LONG));
- return -1;
- } // endif nlen
-
- } // endif Field
-
- return nlen;
- } // end of CheckWrite
-
-/* ------------------------------------------------------------------- */
-
-/***********************************************************************/
-/* Implementation of the TDBFMT class. */
-/***********************************************************************/
-TDBFMT::TDBFMT(PGLOBAL g, PTDBFMT tdbp) : TDBCSV(g, tdbp)
- {
- FldFormat = tdbp->FldFormat;
- To_Fld = tdbp->To_Fld;
- FmtTest = tdbp->FmtTest;
- Linenum = tdbp->Linenum;
- } // end of TDBFMT copy constructor
-
-// Method
-PTDB TDBFMT::CopyOne(PTABS t)
- {
- PTDB tp;
- PCSVCOL cp1, cp2;
-//PFMTCOL cp1, cp2;
- PGLOBAL g = t->G; // Is this really useful ???
-
- tp = new(g) TDBFMT(g, this);
-
- for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) {
-//for (cp1 = (PFMTCOL)Columns; cp1; cp1 = (PFMTCOL)cp1->GetNext()) {
- cp2 = new(g) CSVCOL(cp1, tp); // Make a copy
-// cp2 = new(g) FMTCOL(cp1, tp); // Make a copy
- NewPointer(t, cp1, cp2);
- } // endfor cp1
-
- return tp;
- } // end of CopyOne
-
-/***********************************************************************/
-/* Allocate FMT column description block. */
-/***********************************************************************/
-PCOL TDBFMT::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n)
- {
- return new(g) CSVCOL(g, cdp, this, cprec, n);
-//return new(g) FMTCOL(cdp, this, cprec, n);
- } // end of MakeCol
-
-/***********************************************************************/
-/* FMT EstimatedLength. Returns an estimated minimum line length. */
-/* The big problem here is how can we astimated that minimum ? */
-/***********************************************************************/
-int TDBFMT::EstimatedLength(PGLOBAL g)
- {
- // This is rather stupid !!!
- return ((PDOSDEF)To_Def)->GetEnding() + (int)((Lrecl / 10) + 1);
- } // end of EstimatedLength
-
-/***********************************************************************/
-/* FMT Access Method opening routine. */
-/***********************************************************************/
-bool TDBFMT::OpenDB(PGLOBAL g)
- {
- Linenum = 0;
-
- if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) {
- // Make the formats used to read records
- PSZ pfm;
- int i, n;
- PCSVCOL colp;
- PCOLDEF cdp;
- PDOSDEF tdp = (PDOSDEF)To_Def;
-
-// if (Mode != MODE_UPDATE) {
- for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next)
- if (!colp->IsSpecial()) // Not a pseudo column
- Fields = max(Fields, (int)colp->Fldnum);
-
- if (Columns)
- Fields++; // Fldnum was 0 based
-
-// } else
-// for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext())
-// Fields++;
-
- To_Fld = PlugSubAlloc(g, NULL, Lrecl + 1);
- FldFormat = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields);
- memset(FldFormat, 0, sizeof(PSZ) * Fields);
- FmtTest = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields);
- memset(FmtTest, 0, sizeof(int) * Fields);
-
- // Get the column formats
- for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext())
- if ((i = cdp->GetOffset() - 1) < Fields) {
- if (!(pfm = cdp->GetFmt())) {
- sprintf(g->Message, MSG(NO_FLD_FORMAT), i + 1, Name);
- return true;
- } // endif pfm
-
- // Roughly check the Fmt format
- if ((n = strlen(pfm) - 2) < 4) {
- sprintf(g->Message, MSG(BAD_FLD_FORMAT), i + 1, Name);
- return true;
- } // endif n
-
- FldFormat[i] = (PSZ)PlugSubAlloc(g, NULL, n + 5);
- strcpy(FldFormat[i], pfm);
-
- if (!strcmp(pfm + n, "%m")) {
- // This is a field that can be missing. Flag it so it can
- // be handled with special processing.
- FldFormat[i][n+1] = 'n'; // To have sscanf normal processing
- FmtTest[i] = 2;
- } else if (i+1 < Fields && strcmp(pfm + n, "%n")) {
- // There are trailing characters after the field contents
- // add a marker for the next field start position.
- strcat(FldFormat[i], "%n");
- FmtTest[i] = 1;
- } // endif's
-
- } // endif i
-
- } // endif Use
-
- return TDBCSV::OpenDB(g);
- } // end of OpenDB
-
-/***********************************************************************/
-/* ReadBuffer: Physical read routine for the FMT access method. */
-/***********************************************************************/
-int TDBFMT::ReadBuffer(PGLOBAL g)
- {
- int i, len, n, deb, fin, nwp, pos = 0, rc;
- bool bad = false;
-
- if ((rc = Txfp->ReadBuffer(g)) != RC_OK || !Fields)
- return rc;
- else
- ++Linenum;
-
- if (trace > 1)
- htrc("FMT: Row %d is '%s' rc=%d\n", Linenum, To_Line, rc);
-
- // Find the offsets and lengths of the columns for this row
- for (i = 0; i < Fields; i++) {
- if (!bad) {
- deb = fin = -1;
-
- if (!FldFormat[i]) {
- n = 0;
- } else if (FmtTest[i] == 1) {
- nwp = -1;
- n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin, &nwp);
- } else {
- n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin);
-
- if (n != 1 && (deb >= 0 || i == Fields - 1) && FmtTest[i] == 2) {
- // Missing optional field, not an error
- n = 1;
-
- if (i == Fields - 1)
- fin = deb = 0;
- else
- fin = deb;
-
- } // endif n
-
- nwp = fin;
- } // endif i
-
- if (n != 1 || deb < 0 || fin < 0 || nwp < 0) {
- // This is to avoid a very strange sscanf bug occuring
- // with fields that ends with a null character.
- // This bug causes subsequent sscanf to return in error,
- // so next lines are not parsed correctly.
- sscanf("a", "%*c"); // Seems to reset things Ok
-
- if (CheckErr()) {
- sprintf(g->Message, MSG(BAD_LINEFLD_FMT), Linenum, i + 1, Name);
- return RC_FX;
- } else if (Accept)
- bad = true;
- else
- return RC_NF;
-
- } // endif n...
-
- } // endif !bad
-
- if (!bad) {
- Offset[i] = pos + deb;
- len = fin - deb;
- } else {
- nwp = 0;
- Offset[i] = pos;
- len = 0;
- } // endif bad
-
-// if (Mode != MODE_UPDATE)
- Fldlen[i] = len;
-// else if (len > Fldlen[i]) {
-// sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, To_Tdb->RowNumber(g));
-// return RC_FX;
-// } else {
-// strncpy(Field[i], To_Line + pos, len);
-// Field[i][len] = '\0';
-// } // endif Mode
-
- pos += nwp;
- } // endfor i
-
- if (bad)
- Nerr++;
- else
- sscanf("a", "%*c"); // Seems to reset things Ok
-
- return rc;
- } // end of ReadBuffer
-
-/***********************************************************************/
-/* Data Base write routine FMT file access method. */
-/***********************************************************************/
-int TDBFMT::WriteDB(PGLOBAL g)
- {
- sprintf(g->Message, MSG(FMT_WRITE_NIY), "FMT");
- return RC_FX; // NIY
- } // end of WriteDB
-
-// ------------------------ CSVCOL functions ----------------------------
-
-/***********************************************************************/
-/* CSVCOL public constructor */
-/***********************************************************************/
-CSVCOL::CSVCOL(PGLOBAL g, PCOLDEF cdp, PTDB tdbp, PCOL cprec, int i)
- : DOSCOL(g, cdp, tdbp, cprec, i, "CSV")
- {
- Fldnum = Deplac - 1;
- Deplac = 0;
- } // end of CSVCOL constructor
-
-/***********************************************************************/
-/* CSVCOL constructor used for copying columns. */
-/* tdbp is the pointer to the new table descriptor. */
-/***********************************************************************/
-CSVCOL::CSVCOL(CSVCOL *col1, PTDB tdbp) : DOSCOL(col1, tdbp)
- {
- Fldnum = col1->Fldnum;
- } // end of CSVCOL copy constructor
-
-/***********************************************************************/
-/* VarSize: This function tells UpdateDB whether or not the block */
-/* optimization file must be redone if this column is updated, even */
-/* it is not sorted or clustered. This applies to a blocked table, */
-/* because if it is updated using a temporary file, the block size */
-/* may be modified. */
-/***********************************************************************/
-bool CSVCOL::VarSize(void)
- {
- PTXF txfp = ((PTDBCSV)To_Tdb)->Txfp;
-
- if (txfp->IsBlocked() && txfp->GetUseTemp())
- // Blocked table using a temporary file
- return true;
- else
- return false;
-
- } // end VarSize
-
-/***********************************************************************/
-/* ReadColumn: call DOSCOL::ReadColumn after having set the offet */
-/* and length of the field to read as calculated by TDBCSV::ReadDB. */
-/***********************************************************************/
-void CSVCOL::ReadColumn(PGLOBAL g)
- {
- int rc;
- PTDBCSV tdbp = (PTDBCSV)To_Tdb;
-
- /*********************************************************************/
- /* If physical reading of the line was deferred, do it now. */
- /*********************************************************************/
- if (!tdbp->IsRead())
- if ((rc = tdbp->ReadBuffer(g)) != RC_OK) {
- if (rc == RC_EF)
- sprintf(g->Message, MSG(INV_DEF_READ), rc);
-
- longjmp(g->jumper[g->jump_level], 34);
- } // endif
-
- if (tdbp->Mode != MODE_UPDATE) {
- int colen = Long; // Column length
-
- // Set the field offset and length for this row
- Deplac = tdbp->Offset[Fldnum]; // Field offset
- Long = tdbp->Fldlen[Fldnum]; // Field length
-
- if (trace > 1)
- htrc("CSV ReadColumn %s Fldnum=%d offset=%d fldlen=%d\n",
- Name, Fldnum, Deplac, Long);
-
- if (Long > colen && tdbp->CheckErr()) {
- Long = colen; // Restore column length
- sprintf(g->Message, MSG(FLD_TOO_LNG_FOR),
- Fldnum + 1, Name, To_Tdb->RowNumber(g), tdbp->GetFile(g));
- longjmp(g->jumper[g->jump_level], 34);
- } // endif Long
-
- // Now do the reading
- DOSCOL::ReadColumn(g);
-
- // Restore column length
- Long = colen;
- } else { // Mode Update
- // Field have been copied in TDB Field array
- PSZ fp = tdbp->Field[Fldnum];
-
- Value->SetValue_psz(fp);
- } // endif Mode
-
- } // end of ReadColumn
-
-/***********************************************************************/
-/* WriteColumn: The column is written in TDBCSV matching Field. */
-/***********************************************************************/
-void CSVCOL::WriteColumn(PGLOBAL g)
- {
- char *p, buf[32];
- int flen;
- PTDBCSV tdbp = (PTDBCSV)To_Tdb;
-
- if (trace > 1)
- htrc("CSV WriteColumn: col %s R%d coluse=%.4X status=%.4X\n",
- Name, tdbp->GetTdb_No(), ColUse, Status);
-
- flen = GetLength();
-
- if (trace > 1)
- htrc("Lrecl=%d Long=%d field=%d coltype=%d colval=%p\n",
- tdbp->Lrecl, Long, flen, Buf_Type, Value);
-
- /*********************************************************************/
- /* Check whether the new value has to be converted to Buf_Type. */
- /*********************************************************************/
- if (Value != To_Val)
- Value->SetValue_pval(To_Val, false); // Convert the updated value
-
- /*********************************************************************/
- /* Get the string representation of the column value. */
- /*********************************************************************/
- p = Value->ShowValue(buf);
-
- if (trace > 1)
- htrc("new length(%p)=%d\n", p, strlen(p));
-
- if ((signed)strlen(p) > flen) {
- sprintf(g->Message, MSG(BAD_FLD_LENGTH), Name, p, flen);
- longjmp(g->jumper[g->jump_level], 34);
- } // endif
-
- if (trace > 1)
- htrc("buffer=%s\n", p);
-
- /*********************************************************************/
- /* Updating must be done also during the first pass so writing the */
- /* updated record can be checked for acceptable record length. */
- /*********************************************************************/
- if (Fldnum < 0) {
- // This can happen for wrong offset value in XDB files
- sprintf(g->Message, MSG(BAD_FIELD_RANK), Fldnum + 1, Name);
- longjmp(g->jumper[g->jump_level], 34);
- } else
- strncpy(tdbp->Field[Fldnum], p, flen);
-
- if (trace > 1)
- htrc(" col written: '%s'\n", p);
-
- } // end of WriteColumn
-
-/* ------------------------ End of TabFmt ---------------------------- */
+/************* TabFmt C++ Program Source Code File (.CPP) **************/ +/* PROGRAM NAME: TABFMT */ +/* ------------- */ +/* Version 3.7 */ +/* */ +/* COPYRIGHT: */ +/* ---------- */ +/* (C) Copyright to the author Olivier BERTRAND 2001 - 2013 */ +/* */ +/* WHAT THIS PROGRAM DOES: */ +/* ----------------------- */ +/* This program are the TABFMT classes DB execution routines. */ +/* The base class CSV is comma separated files. */ +/* FMT (Formatted) files are those having a complex internal record */ +/* format described in the Format keyword of their definition. */ +/***********************************************************************/ + +/***********************************************************************/ +/* Include relevant MariaDB header file. */ +/***********************************************************************/ +#include "my_global.h" + +#if defined(WIN32) +#include <io.h> +#include <fcntl.h> +#include <errno.h> +#include <locale.h> +#if defined(__BORLANDC__) +#define __MFC_COMPAT__ // To define min/max as macro +#endif +//#include <windows.h> +#include "osutil.h" +#else +#if defined(UNIX) +#include <errno.h> +#include <unistd.h> +#include "osutil.h" +#else +#include <io.h> +#endif +#include <fcntl.h> +#endif + +/***********************************************************************/ +/* Include application header files: */ +/* global.h is header containing all global declarations. */ +/* plgdbsem.h is header containing the DB application declarations. */ +/* tabdos.h is header containing the TABDOS class declarations. */ +/***********************************************************************/ +#include "global.h" +#include "plgdbsem.h" +#include "filamap.h" +#if defined(ZIP_SUPPORT) +#include "filamzip.h" +#endif // ZIP_SUPPORT +#include "tabfmt.h" +#include "tabmul.h" +#define NO_FUNC +#include "plgcnx.h" // For DB types +#include "resource.h" + +/***********************************************************************/ +/* This should be an option. */ +/***********************************************************************/ +#define MAXCOL 200 /* Default max column nb in result */ +#define TYPE_UNKNOWN 10 /* Must be greater than other types */ + +extern "C" int trace; + +/***********************************************************************/ +/* CSV Catalog utility functions. */ +/***********************************************************************/ +PQRYRES PlgAllocResult(PGLOBAL, int, int, int, int *, int *, + unsigned int *, bool blank = true, bool nonull = false); + +/***********************************************************************/ +/* CSVColumns: constructs the result blocks containing the description */ +/* of all the columns of a CSV file that will be retrieved by #GetData.*/ +/* Note: the algorithm to set the type is based on the internal values */ +/* of types (TYPE_STRING < TYPE_FLOAT < TYPE_INT) (1 < 2 < 7). */ +/* If these values are changed, this will have to be revisited. */ +/***********************************************************************/ +PQRYRES CSVColumns(PGLOBAL g, char *fn, char sep, char q, int hdr, int mxr) + { + static int dbtype[] = {DB_CHAR, DB_SHORT, DB_CHAR, + DB_INT, DB_INT, DB_SHORT}; + static int buftyp[] = {TYPE_STRING, TYPE_SHORT, TYPE_STRING, + TYPE_INT, TYPE_INT, TYPE_SHORT}; + static unsigned int length[] = {6, 6, 8, 10, 10, 6}; + char *p, *colname[MAXCOL], dechar, filename[_MAX_PATH], buf[4096]; + int i, imax, hmax, n, nerr, phase, blank, digit, dec, type; + int ncol = sizeof(dbtype) / sizeof(int); + int num_read = 0, num_max = 10000000; // Statistics + int len[MAXCOL], typ[MAXCOL], prc[MAXCOL]; + FILE *infile; + PQRYRES qrp; + PCOLRES crp; + +// num_max = atoi(p+1); // Max num of record to test +#if defined(WIN32) + if (strnicmp(setlocale(LC_NUMERIC, NULL), "French", 6)) + dechar = '.'; + else + dechar = ','; +#else // !WIN32 + dechar = '.'; +#endif // !WIN32 + + if (trace) + htrc("File %s sep=%c q=%c hdr=%d mxr=%d\n", + SVP(fn), sep, q, hdr, mxr); + + if (!fn) { + strcpy(g->Message, MSG(MISSING_FNAME)); + return NULL; + } // endif fn + + imax = hmax = nerr = 0; + mxr = max(0, mxr); + + for (i = 0; i < MAXCOL; i++) { + colname[i] = NULL; + len[i] = 0; + typ[i] = TYPE_UNKNOWN; + prc[i] = 0; + } // endfor i + + /*********************************************************************/ + /* Open the input file. */ + /*********************************************************************/ + PlugSetPath(filename, fn, PlgGetDataPath(g)); + + if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "r"))) + return NULL; + + if (hdr) { + /*******************************************************************/ + /* Make the column names from the first line. */ + /*******************************************************************/ + phase = 0; + + if (fgets(buf, sizeof(buf), infile)) { + n = strlen(buf) + 1; + buf[n - 2] = '\0'; +#if defined(UNIX) + // The file can be imported from Windows + if (buf[n - 3] == '\r') + buf[n - 3] = 0; +#endif // UNIX + p = (char*)PlugSubAlloc(g, NULL, n); + memcpy(p, buf, n); + + //skip leading blanks + for (; *p == ' '; p++) ; + + if (q && *p == q) { + // Header is quoted + p++; + phase = 1; + } // endif q + + colname[0] = p; + } else { + sprintf(g->Message, MSG(FILE_IS_EMPTY), fn); + goto err; + } // endif's + + for (i = 1; *p; p++) + if (phase == 1 && *p == q) { + *p = '\0'; + phase = 0; + } else if (*p == sep && !phase) { + *p = '\0'; + + //skip leading blanks + for (; *(p+1) == ' '; p++) ; + + if (q && *(p+1) == q) { + // Header is quoted + p++; + phase = 1; + } // endif q + + colname[i++] = p + 1; + } // endif sep + + num_read++; + imax = hmax = i; + + for (i = 0; i < hmax; i++) + length[0] = max(length[0], strlen(colname[i])); + + } // endif hdr + + for (num_read++; num_read <= num_max; num_read++) { + /*******************************************************************/ + /* Now start the reading process. Read one line. */ + /*******************************************************************/ + if (fgets(buf, sizeof(buf), infile)) { + n = strlen(buf); + buf[n - 1] = '\0'; +#if defined(UNIX) + // The file can be imported from Windows + if (buf[n - 2] == '\r') + buf[n - 2] = 0; +#endif // UNIX + } else if (feof(infile)) { + sprintf(g->Message, MSG(EOF_AFTER_LINE), num_read -1); + break; + } else { + sprintf(g->Message, MSG(ERR_READING_REC), num_read, fn); + goto err; + } // endif's + + /*******************************************************************/ + /* Make the test for field lengths. */ + /*******************************************************************/ + i = n = phase = blank = digit = dec = 0; + + for (p = buf; *p; p++) + if (*p == sep) { + if (phase != 1) { + if (i == MAXCOL - 1) { + sprintf(g->Message, MSG(TOO_MANY_FIELDS), num_read, fn); + goto err; + } // endif i + + if (n) { + len[i] = max(len[i], n); + type = (digit || (dec && n == 1)) ? TYPE_STRING + : (dec) ? TYPE_FLOAT : TYPE_INT; + typ[i] = min(type, typ[i]); + prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]); + } // endif n + + i++; + n = phase = blank = digit = dec = 0; + } else // phase == 1 + n++; + + } else if (*p == ' ') { + if (phase < 2) + n++; + + if (blank) + digit = 1; + + } else if (*p == q) { + if (phase == 0) { + if (blank) + if (++nerr > mxr) { + sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); + goto err; + } else + goto skip; + + n = 0; + phase = digit = 1; + } else if (phase == 1) { + if (*(p+1) == q) { + // This is currently not implemented for CSV tables +// if (++nerr > mxr) { +// sprintf(g->Message, MSG(QUOTE_IN_QUOTE), num_read); +// goto err; +// } else +// goto skip; + + p++; + n++; + } else + phase = 2; + + } else if (++nerr > mxr) { // phase == 2 + sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); + goto err; + } else + goto skip; + + } else { + if (phase == 2) + if (++nerr > mxr) { + sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); + goto err; + } else + goto skip; + + // isdigit cannot be used here because of debug assert + if (!strchr("0123456789", *p)) { + if (!digit && *p == dechar) + dec = 1; // Decimal point found + else if (blank || !(*p == '-' || *p == '+')) + digit = 1; + + } else if (dec) + dec++; // More decimals + + n++; + blank = 1; + } // endif's *p + + if (phase == 1) + if (++nerr > mxr) { + sprintf(g->Message, MSG(UNBALANCE_QUOTE), num_read); + goto err; + } else + goto skip; + + if (n) { + len[i] = max(len[i], n); + type = (digit || n == 0 || (dec && n == 1)) ? TYPE_STRING + : (dec) ? TYPE_FLOAT : TYPE_INT; + typ[i] = min(type, typ[i]); + prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]); + } // endif n + + imax = max(imax, i+1); + skip: ; // Skip erroneous line + } // endfor num_read + + if (trace) { + htrc("imax=%d Lengths:", imax); + + for (i = 0; i < imax; i++) + htrc(" %d", len[i]); + + htrc("\n"); + } // endif trace + + fclose(infile); + + if (trace) + htrc("CSVColumns: imax=%d hmax=%d len=%d\n", + imax, hmax, length[0]); + + /*********************************************************************/ + /* Allocate the structures used to refer to the result set. */ + /*********************************************************************/ + qrp = PlgAllocResult(g, ncol, imax, IDS_COLUMNS + 3, + dbtype, buftyp, length); + qrp->Nblin = imax; + + /*********************************************************************/ + /* Now get the results into blocks. */ + /*********************************************************************/ + for (i = 0; i < imax; i++) { + if (i >= hmax) { + sprintf(buf, "COL%.3d", i+1); + p = buf; + } else + p = colname[i]; + + if (typ[i] == TYPE_UNKNOWN) // Void column + typ[i] = TYPE_STRING; + + crp = qrp->Colresp; // Column Name + crp->Kdata->SetValue(p, i); + crp = crp->Next; // Data Type + crp->Kdata->SetValue(typ[i], i); + crp = crp->Next; // Type Name + crp->Kdata->SetValue(GetTypeName(typ[i]), i); + crp = crp->Next; // Precision + crp->Kdata->SetValue(len[i], i); + crp = crp->Next; // Length + crp->Kdata->SetValue(len[i], i); + crp = crp->Next; // Scale (precision) + crp->Kdata->SetValue(prc[i], i); + } // endfor i + + /*********************************************************************/ + /* Return the result pointer for use by GetData routines. */ + /*********************************************************************/ + return qrp; + + err: + fclose(infile); + return NULL; + } // end of CSVCColumns + +/* --------------------------- Class CSVDEF -------------------------- */ + +/***********************************************************************/ +/* CSVDEF constructor. */ +/***********************************************************************/ +CSVDEF::CSVDEF(void) + { + Fmtd = Accept = Header = false; + Maxerr = 0; + Quoted = -1; + Sep = ','; + Qot = '\0'; + } // end of CSVDEF constructor + +/***********************************************************************/ +/* DefineAM: define specific AM block values from XDB file. */ +/***********************************************************************/ +bool CSVDEF::DefineAM(PGLOBAL g, LPCSTR am, int poff) + { + char buf[8]; + + // Double check correctness of offset values + for (PCOLDEF cdp = To_Cols; cdp; cdp = cdp->GetNext()) + if (cdp->GetOffset() < 1) { + strcpy(g->Message, MSG(BAD_OFFSET_VAL)); + return true; + } // endif Offset + + // Call DOSDEF DefineAM with am=CSV so FMT is not confused with FIX + if (DOSDEF::DefineAM(g, "CSV", poff)) + return true; + + Cat->GetCharCatInfo(Name, "Separator", ",", buf, sizeof(buf)); + Sep = (strlen(buf) == 2 && buf[0] == '\\' && buf[1] == 't') ? '\t' : *buf; + Quoted = Cat->GetIntCatInfo(Name, "Quoted", -1); + Cat->GetCharCatInfo(Name, "Qchar", "", buf, sizeof(buf)); + Qot = *buf; + + if (Qot && Quoted < 0) + Quoted = 0; + else if (!Qot && Quoted >= 0) + Qot = '"'; + + Fmtd = (!Sep || (am && (*am == 'F' || *am == 'f'))); + Header = (Cat->GetIntCatInfo(Name, "Header", 0) != 0); + Maxerr = Cat->GetIntCatInfo(Name, "Maxerr", 0); + Accept = (Cat->GetIntCatInfo(Name, "Accept", 0) != 0); + return false; + } // end of DefineAM + +/***********************************************************************/ +/* GetTable: makes a new Table Description Block. */ +/***********************************************************************/ +PTDB CSVDEF::GetTable(PGLOBAL g, MODE mode) + { + USETEMP tmp = PlgGetUser(g)->UseTemp; + bool map = Mapped && mode != MODE_INSERT && + !(tmp != TMP_NO && mode == MODE_UPDATE) && + !(tmp == TMP_FORCE && + (mode == MODE_UPDATE || mode == MODE_DELETE)); + PTXF txfp; + PTDBASE tdbp; + + /*********************************************************************/ + /* Allocate a file processing class of the proper type. */ + /*********************************************************************/ + if (map) { + // Should be now compatible with UNIX + txfp = new(g) MAPFAM(this); + } else if (Compressed) { +#if defined(ZIP_SUPPORT) + if (Compressed == 1) + txfp = new(g) ZIPFAM(this); + else { + strcpy(g->Message, "Compress 2 not supported yet"); +// txfp = new(g) ZLBFAM(defp); + return NULL; + } // endelse +#else // !ZIP_SUPPORT + strcpy(g->Message, "Compress not supported"); + return NULL; +#endif // !ZIP_SUPPORT + } else + txfp = new(g) DOSFAM(this); + + /*********************************************************************/ + /* Allocate a TDB of the proper type. */ + /* Column blocks will be allocated only when needed. */ + /*********************************************************************/ + if (!Fmtd) + tdbp = new(g) TDBCSV(this, txfp); + else + tdbp = new(g) TDBFMT(this, txfp); + + if (Multiple) + tdbp = new(g) TDBMUL(tdbp); + + return tdbp; + } // end of GetTable + +/* -------------------------- Class TDBCSV --------------------------- */ + +/***********************************************************************/ +/* Implementation of the TDBCSV class. */ +/***********************************************************************/ +TDBCSV::TDBCSV(PCSVDEF tdp, PTXF txfp) : TDBDOS(tdp, txfp) + { +#if defined(_DEBUG) + assert (tdp); +#endif + Field = NULL; + Offset = NULL; + Fldlen = NULL; + Fields = 0; + Nerr = 0; + Quoted = tdp->Quoted; + Maxerr = tdp->Maxerr; + Accept = tdp->Accept; + Header = tdp->Header; + Sep = tdp->GetSep(); + Qot = tdp->GetQot(); + } // end of TDBCSV standard constructor + +TDBCSV::TDBCSV(PGLOBAL g, PTDBCSV tdbp) : TDBDOS(g, tdbp) + { + Fields = tdbp->Fields; + + if (Fields) { + if (tdbp->Offset) + Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + + if (tdbp->Fldlen) + Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + + Field = (PSZ *)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); + + for (int i = 0; i < Fields; i++) { + if (Offset) + Offset[i] = tdbp->Offset[i]; + + if (Fldlen) + Fldlen[i] = tdbp->Fldlen[i]; + + if (Field) { + assert (Fldlen); + Field[i] = (PSZ)PlugSubAlloc(g, NULL, Fldlen[i] + 1); + Field[i][Fldlen[i]] = '\0'; + } // endif Field + + } // endfor i + + } else { + Field = NULL; + Offset = NULL; + Fldlen = NULL; + } // endif Fields + + Nerr = tdbp->Nerr; + Maxerr = tdbp->Maxerr; + Quoted = tdbp->Quoted; + Accept = tdbp->Accept; + Header = tdbp->Header; + Sep = tdbp->Sep; + Qot = tdbp->Qot; + } // end of TDBCSV copy constructor + +// Method +PTDB TDBCSV::CopyOne(PTABS t) + { + PTDB tp; + PCSVCOL cp1, cp2; + PGLOBAL g = t->G; // Is this really useful ??? + + tp = new(g) TDBCSV(g, this); + + for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) { + cp2 = new(g) CSVCOL(cp1, tp); // Make a copy + NewPointer(t, cp1, cp2); + } // endfor cp1 + + return tp; + } // end of CopyOne + +/***********************************************************************/ +/* Allocate CSV column description block. */ +/***********************************************************************/ +PCOL TDBCSV::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n) + { + return new(g) CSVCOL(g, cdp, this, cprec, n); + } // end of MakeCol + +/***********************************************************************/ +/* Check whether the number of errors is greater than the maximum. */ +/***********************************************************************/ +bool TDBCSV::CheckErr(void) + { + return (++Nerr) > Maxerr; + } // end of CheckErr + +/***********************************************************************/ +/* CSV EstimatedLength. Returns an estimated minimum line length. */ +/***********************************************************************/ +int TDBCSV::EstimatedLength(PGLOBAL g) + { + if (trace) + htrc("EstimatedLength: Fields=%d Columns=%p\n", Fields, Columns); + + if (!Fields) { + PCSVCOL colp; + + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) + if (!colp->IsSpecial()) // Not a pseudo column + Fields = max(Fields, (int)colp->Fldnum); + + if (Columns) + Fields++; // Fldnum was 0 based + + } // endif Fields + + return (int)Fields; // Number of separators if all fields are null + } // end of Estimated Length + +#if 0 +/***********************************************************************/ +/* CSV tables favor the use temporary files for Update. */ +/***********************************************************************/ +bool TDBCSV::IsUsingTemp(PGLOBAL g) + { + USETEMP usetemp = PlgGetUser(g)->UseTemp; + + return (usetemp == TMP_YES || usetemp == TMP_FORCE || + (usetemp == TMP_AUTO && Mode == MODE_UPDATE)); + } // end of IsUsingTemp +#endif // 0 (Same as TDBDOS one) + +/***********************************************************************/ +/* CSV Access Method opening routine. */ +/* First allocate the Offset and Fldlen arrays according to the */ +/* greatest field used in that query. Then call the DOS opening fnc. */ +/***********************************************************************/ +bool TDBCSV::OpenDB(PGLOBAL g) + { + bool rc = false; + PCOLDEF cdp; + PDOSDEF tdp = (PDOSDEF)To_Def; + + if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) { + // Allocate the storage used to read (or write) records + int i, len; + PCSVCOL colp; + + if (!Fields) // May have been set in TABFMT::OpenDB + if (Mode != MODE_UPDATE && Mode != MODE_INSERT) { + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) + if (!colp->IsSpecial()) // Not a pseudo column + Fields = max(Fields, (int)colp->Fldnum); + + if (Columns) + Fields++; // Fldnum was 0 based + + } else + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + Fields++; + + Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + + if (Mode == MODE_INSERT || Mode == MODE_UPDATE) { + Field = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); + Fldtyp = (bool*)PlugSubAlloc(g, NULL, sizeof(bool) * Fields); + } // endif Mode + + for (i = 0; i < Fields; i++) { + Offset[i] = 0; + Fldlen[i] = 0; + + if (Field) { + Field[i] = NULL; + Fldtyp[i] = false; + } // endif Field + + } // endfor i + + if (Field) + // Prepare writing fields + if (Mode != MODE_UPDATE) + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) { + i = colp->Fldnum; + len = colp->GetLength(); + Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1); + Field[i][len] = '\0'; + Fldlen[i] = len; + Fldtyp[i] = IsTypeNum(colp->GetResultType()); + } // endfor colp + + else // MODE_UPDATE + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) { + i = cdp->GetOffset() - 1; + len = cdp->GetLength(); + Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1); + Field[i][len] = '\0'; + Fldlen[i] = len; + Fldtyp[i] = IsTypeNum(cdp->GetType()); + } // endfor colp + + } // endif Use + + if (Header) { + // Check that the Lrecl is at least equal to the header line length + int headlen = 0; + PCOLDEF cdp; + PDOSDEF tdp = (PDOSDEF)To_Def; + + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + headlen += strlen(cdp->GetName()) + 3; // 3 if names are quoted + + if (headlen > Lrecl) { + Lrecl = headlen; + Txfp->Lrecl = headlen; + } // endif headlen + + } // endif Header + + Nerr = 0; + rc = TDBDOS::OpenDB(g); + + if (!rc && Mode == MODE_UPDATE && To_Kindex) + // Because KINDEX::Init is executed in mode READ, we must restore + // the Fldlen array that was modified when reading the table file. + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + Fldlen[cdp->GetOffset() - 1] = cdp->GetLength(); + + return rc; + } // end of OpenDB + +/***********************************************************************/ +/* SkipHeader: Physically skip first header line if applicable. */ +/* This is called from TDBDOS::OpenDB and must be executed before */ +/* Kindex construction if the file is accessed using an index. */ +/***********************************************************************/ +bool TDBCSV::SkipHeader(PGLOBAL g) + { + int len = GetFileLength(g); + bool rc = false; + +#if defined(_DEBUG) + if (len < 0) + return true; +#endif // _DEBUG + + if (Header) { + if (Mode == MODE_INSERT) { + if (!len) { + // New file, the header line must be constructed and written + int i, n = 0; + int hlen = 0; + bool q = Qot && Quoted > 0; + PCOLDEF cdp; + + // Estimate the length of the header list + for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext()) { + hlen += (1 + strlen(cdp->GetName())); + hlen += ((q) ? 2 : 0); + n++; // Calculate the number of columns + } // endfor cdp + + if (hlen > Lrecl) { + sprintf(g->Message, MSG(LRECL_TOO_SMALL), hlen); + return true; + } // endif hlen + + // File is empty, write a header record + memset(To_Line, 0, Lrecl); + + // The column order in the file is given by the offset value + for (i = 1; i <= n; i++) + for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext()) + if (cdp->GetOffset() == i) { + if (q) + To_Line[strlen(To_Line)] = Qot; + + strcat(To_Line, cdp->GetName()); + + if (q) + To_Line[strlen(To_Line)] = Qot; + + if (i < n) + To_Line[strlen(To_Line)] = Sep; + + } // endif Offset + + rc = (Txfp->WriteBuffer(g) == RC_FX); + } // endif !FileLength + + } else if (Mode == MODE_DELETE) { + if (len) + rc = (Txfp->SkipRecord(g, true) == RC_FX); + + } else if (len) // !Insert && !Delete + rc = (Txfp->SkipRecord(g, false) == RC_FX || Txfp->RecordPos(g)); + + } // endif Header + + return rc; + } // end of SkipHeader + +/***********************************************************************/ +/* ReadBuffer: Physical read routine for the CSV access method. */ +/***********************************************************************/ +int TDBCSV::ReadBuffer(PGLOBAL g) + { + char *p1, *p2, *p = NULL; + int i, n, len, rc = Txfp->ReadBuffer(g); + bool bad = false; + + if (trace > 1) + htrc("CSV: Row is '%s' rc=%d\n", To_Line, rc); + + if (rc != RC_OK || !Fields) + return rc; + else + p2 = To_Line; + + // Find the offsets and lengths of the columns for this row + for (i = 0; i < Fields; i++) { + if (!bad) { + if (Qot && *p2 == Qot) { // Quoted field + for (n = 0, p1 = ++p2; (p = strchr(p1, Qot)); p1 = p + 2) + if (*(p + 1) == Qot) + n++; // Doubled internal quotes + else + break; // Final quote + + if (p) { + len = p++ - p2; + +// if (Sep != ' ') +// for (; *p == ' '; p++) ; // Skip blanks + + if (*p != Sep && i != Fields - 1) { // Should be the separator + if (CheckErr()) { + sprintf(g->Message, MSG(MISSING_FIELD), + i+1, Name, RowNumber(g)); + return RC_FX; + } else if (Accept) + bad = true; + else + return RC_NF; + + } // endif p + + if (n) { + int j, k; + + // Suppress the double of internal quotes + for (j = k = 0; j < len; j++, k++) { + if (p2[j] == Qot) + j++; // skip first one + + p2[k] = p2[j]; + } // endfor i, j + + len -= n; + } // endif n + + } else if (CheckErr()) { + sprintf(g->Message, MSG(BAD_QUOTE_FIELD), + Name, i+1, RowNumber(g)); + return RC_FX; + } else if (Accept) { + len = strlen(p2); + bad = true; + } else + return RC_NF; + + } else if ((p = strchr(p2, Sep))) + len = p - p2; + else if (i == Fields - 1) + len = strlen(p2); + else if (Accept && Maxerr == 0) { + len = strlen(p2); + bad = true; + } else if (CheckErr()) { + sprintf(g->Message, MSG(MISSING_FIELD), i+1, Name, RowNumber(g)); + return RC_FX; + } else if (Accept) { + len = strlen(p2); + bad = true; + } else + return RC_NF; + + } else + len = 0; + + Offset[i] = p2 - To_Line; + + if (Mode != MODE_UPDATE) + Fldlen[i] = len; + else if (len > Fldlen[i]) { + sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, RowNumber(g)); + return RC_FX; + } else { + strncpy(Field[i], p2, len); + Field[i][len] = '\0'; + } // endif Mode + + if (p) + p2 = p + 1; + + } // endfor i + + return rc; + } // end of ReadBuffer + +/***********************************************************************/ +/* Data Base write routine CSV file access method. */ +/***********************************************************************/ +int TDBCSV::WriteDB(PGLOBAL g) + { + char sep[2], qot[2]; + int i, nlen, oldlen = strlen(To_Line); + + if (trace > 1) + htrc("CSV WriteDB: R%d Mode=%d key=%p link=%p\n", + Tdb_No, Mode, To_Key_Col, To_Link); + + // Before writing the line we must check its length + if ((nlen = CheckWrite(g)) < 0) + return RC_FX; + + // Before writing the line we must make it + sep[0] = Sep; + sep[1] = '\0'; + qot[0] = Qot; + qot[1] = '\0'; + *To_Line = '\0'; + + for (i = 0; i < Fields; i++) { + if (i) + strcat(To_Line, sep); + + if (Field[i]) + if (!strlen(Field[i])) { + // Generally null fields are not quoted + if (Quoted > 2) + // Except if explicitely required + strcat(strcat(To_Line, qot), qot); + + } else if (Qot && (strchr(Field[i], Sep) || *Field[i] == Qot + || Quoted > 1 || (Quoted == 1 && !Fldtyp[i]))) + if (strchr(Field[i], Qot)) { + // Field contains quotes that must be doubled + int j, k = strlen(To_Line), n = strlen(Field[i]); + + To_Line[k++] = Qot; + + for (j = 0; j < n; j++) { + if (Field[i][j] == Qot) + To_Line[k++] = Qot; + + To_Line[k++] = Field[i][j]; + } // endfor j + + To_Line[k++] = Qot; + To_Line[k] = '\0'; + } else + strcat(strcat(strcat(To_Line, qot), Field[i]), qot); + + else + strcat(To_Line, Field[i]); + + } // endfor i + +#if defined(_DEBUG) + assert ((unsigned)nlen == strlen(To_Line)); +#endif + + if (Mode == MODE_UPDATE && nlen < oldlen + && !((PDOSFAM)Txfp)->GetUseTemp()) { + // In Update mode with no temp file, line length must not change + To_Line[nlen] = Sep; + + for (nlen++; nlen < oldlen; nlen++) + To_Line[nlen] = ' '; + + To_Line[nlen] = '\0'; + } // endif + + if (trace > 1) + htrc("Write: line is=%s", To_Line); + + /*********************************************************************/ + /* Now start the writing process. */ + /*********************************************************************/ + return Txfp->WriteBuffer(g); + } // end of WriteDB + +/***********************************************************************/ +/* Check whether a new line fit in the file lrecl size. */ +/***********************************************************************/ +int TDBCSV::CheckWrite(PGLOBAL g) + { + int maxlen, n, nlen = (Fields - 1); + + if (trace > 1) + htrc("CheckWrite: R%d Mode=%d\n", Tdb_No, Mode); + + // Before writing the line we must check its length + maxlen = (Mode == MODE_UPDATE && !Txfp->GetUseTemp()) + ? strlen(To_Line) : Lrecl; + + // Check whether record is too int + for (int i = 0; i < Fields; i++) + if (Field[i]) { + if (!(n = strlen(Field[i]))) + n += (Quoted > 2 ? 2 : 0); + else if (strchr(Field[i], Sep) || (Qot && *Field[i] == Qot) + || Quoted > 1 || (Quoted == 1 && !Fldtyp[i])) + if (!Qot) { + sprintf(g->Message, MSG(SEP_IN_FIELD), i + 1); + return -1; + } else { + // Quotes inside a quoted field must be doubled + char *p1, *p2; + + for (p1 = Field[i]; (p2 = strchr(p1, Qot)); p1 = p2 + 1) + n++; + + n += 2; // Outside quotes + } // endif + + if ((nlen += n) > maxlen) { + strcpy(g->Message, MSG(LINE_TOO_LONG)); + return -1; + } // endif nlen + + } // endif Field + + return nlen; + } // end of CheckWrite + +/* ------------------------------------------------------------------- */ + +/***********************************************************************/ +/* Implementation of the TDBFMT class. */ +/***********************************************************************/ +TDBFMT::TDBFMT(PGLOBAL g, PTDBFMT tdbp) : TDBCSV(g, tdbp) + { + FldFormat = tdbp->FldFormat; + To_Fld = tdbp->To_Fld; + FmtTest = tdbp->FmtTest; + Linenum = tdbp->Linenum; + } // end of TDBFMT copy constructor + +// Method +PTDB TDBFMT::CopyOne(PTABS t) + { + PTDB tp; + PCSVCOL cp1, cp2; +//PFMTCOL cp1, cp2; + PGLOBAL g = t->G; // Is this really useful ??? + + tp = new(g) TDBFMT(g, this); + + for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) { +//for (cp1 = (PFMTCOL)Columns; cp1; cp1 = (PFMTCOL)cp1->GetNext()) { + cp2 = new(g) CSVCOL(cp1, tp); // Make a copy +// cp2 = new(g) FMTCOL(cp1, tp); // Make a copy + NewPointer(t, cp1, cp2); + } // endfor cp1 + + return tp; + } // end of CopyOne + +/***********************************************************************/ +/* Allocate FMT column description block. */ +/***********************************************************************/ +PCOL TDBFMT::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n) + { + return new(g) CSVCOL(g, cdp, this, cprec, n); +//return new(g) FMTCOL(cdp, this, cprec, n); + } // end of MakeCol + +/***********************************************************************/ +/* FMT EstimatedLength. Returns an estimated minimum line length. */ +/* The big problem here is how can we astimated that minimum ? */ +/***********************************************************************/ +int TDBFMT::EstimatedLength(PGLOBAL g) + { + // This is rather stupid !!! + return ((PDOSDEF)To_Def)->GetEnding() + (int)((Lrecl / 10) + 1); + } // end of EstimatedLength + +/***********************************************************************/ +/* FMT Access Method opening routine. */ +/***********************************************************************/ +bool TDBFMT::OpenDB(PGLOBAL g) + { + Linenum = 0; + + if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) { + // Make the formats used to read records + PSZ pfm; + int i, n; + PCSVCOL colp; + PCOLDEF cdp; + PDOSDEF tdp = (PDOSDEF)To_Def; + +// if (Mode != MODE_UPDATE) { + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) + if (!colp->IsSpecial()) // Not a pseudo column + Fields = max(Fields, (int)colp->Fldnum); + + if (Columns) + Fields++; // Fldnum was 0 based + +// } else +// for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) +// Fields++; + + To_Fld = PlugSubAlloc(g, NULL, Lrecl + 1); + FldFormat = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); + memset(FldFormat, 0, sizeof(PSZ) * Fields); + FmtTest = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + memset(FmtTest, 0, sizeof(int) * Fields); + + // Get the column formats + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + if ((i = cdp->GetOffset() - 1) < Fields) { + if (!(pfm = cdp->GetFmt())) { + sprintf(g->Message, MSG(NO_FLD_FORMAT), i + 1, Name); + return true; + } // endif pfm + + // Roughly check the Fmt format + if ((n = strlen(pfm) - 2) < 4) { + sprintf(g->Message, MSG(BAD_FLD_FORMAT), i + 1, Name); + return true; + } // endif n + + FldFormat[i] = (PSZ)PlugSubAlloc(g, NULL, n + 5); + strcpy(FldFormat[i], pfm); + + if (!strcmp(pfm + n, "%m")) { + // This is a field that can be missing. Flag it so it can + // be handled with special processing. + FldFormat[i][n+1] = 'n'; // To have sscanf normal processing + FmtTest[i] = 2; + } else if (i+1 < Fields && strcmp(pfm + n, "%n")) { + // There are trailing characters after the field contents + // add a marker for the next field start position. + strcat(FldFormat[i], "%n"); + FmtTest[i] = 1; + } // endif's + + } // endif i + + } // endif Use + + return TDBCSV::OpenDB(g); + } // end of OpenDB + +/***********************************************************************/ +/* ReadBuffer: Physical read routine for the FMT access method. */ +/***********************************************************************/ +int TDBFMT::ReadBuffer(PGLOBAL g) + { + int i, len, n, deb, fin, nwp, pos = 0, rc; + bool bad = false; + + if ((rc = Txfp->ReadBuffer(g)) != RC_OK || !Fields) + return rc; + else + ++Linenum; + + if (trace > 1) + htrc("FMT: Row %d is '%s' rc=%d\n", Linenum, To_Line, rc); + + // Find the offsets and lengths of the columns for this row + for (i = 0; i < Fields; i++) { + if (!bad) { + deb = fin = -1; + + if (!FldFormat[i]) { + n = 0; + } else if (FmtTest[i] == 1) { + nwp = -1; + n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin, &nwp); + } else { + n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin); + + if (n != 1 && (deb >= 0 || i == Fields - 1) && FmtTest[i] == 2) { + // Missing optional field, not an error + n = 1; + + if (i == Fields - 1) + fin = deb = 0; + else + fin = deb; + + } // endif n + + nwp = fin; + } // endif i + + if (n != 1 || deb < 0 || fin < 0 || nwp < 0) { + // This is to avoid a very strange sscanf bug occuring + // with fields that ends with a null character. + // This bug causes subsequent sscanf to return in error, + // so next lines are not parsed correctly. + sscanf("a", "%*c"); // Seems to reset things Ok + + if (CheckErr()) { + sprintf(g->Message, MSG(BAD_LINEFLD_FMT), Linenum, i + 1, Name); + return RC_FX; + } else if (Accept) + bad = true; + else + return RC_NF; + + } // endif n... + + } // endif !bad + + if (!bad) { + Offset[i] = pos + deb; + len = fin - deb; + } else { + nwp = 0; + Offset[i] = pos; + len = 0; + } // endif bad + +// if (Mode != MODE_UPDATE) + Fldlen[i] = len; +// else if (len > Fldlen[i]) { +// sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, To_Tdb->RowNumber(g)); +// return RC_FX; +// } else { +// strncpy(Field[i], To_Line + pos, len); +// Field[i][len] = '\0'; +// } // endif Mode + + pos += nwp; + } // endfor i + + if (bad) + Nerr++; + else + sscanf("a", "%*c"); // Seems to reset things Ok + + return rc; + } // end of ReadBuffer + +/***********************************************************************/ +/* Data Base write routine FMT file access method. */ +/***********************************************************************/ +int TDBFMT::WriteDB(PGLOBAL g) + { + sprintf(g->Message, MSG(FMT_WRITE_NIY), "FMT"); + return RC_FX; // NIY + } // end of WriteDB + +// ------------------------ CSVCOL functions ---------------------------- + +/***********************************************************************/ +/* CSVCOL public constructor */ +/***********************************************************************/ +CSVCOL::CSVCOL(PGLOBAL g, PCOLDEF cdp, PTDB tdbp, PCOL cprec, int i) + : DOSCOL(g, cdp, tdbp, cprec, i, "CSV") + { + Fldnum = Deplac - 1; + Deplac = 0; + } // end of CSVCOL constructor + +/***********************************************************************/ +/* CSVCOL constructor used for copying columns. */ +/* tdbp is the pointer to the new table descriptor. */ +/***********************************************************************/ +CSVCOL::CSVCOL(CSVCOL *col1, PTDB tdbp) : DOSCOL(col1, tdbp) + { + Fldnum = col1->Fldnum; + } // end of CSVCOL copy constructor + +/***********************************************************************/ +/* VarSize: This function tells UpdateDB whether or not the block */ +/* optimization file must be redone if this column is updated, even */ +/* it is not sorted or clustered. This applies to a blocked table, */ +/* because if it is updated using a temporary file, the block size */ +/* may be modified. */ +/***********************************************************************/ +bool CSVCOL::VarSize(void) + { + PTXF txfp = ((PTDBCSV)To_Tdb)->Txfp; + + if (txfp->IsBlocked() && txfp->GetUseTemp()) + // Blocked table using a temporary file + return true; + else + return false; + + } // end VarSize + +/***********************************************************************/ +/* ReadColumn: call DOSCOL::ReadColumn after having set the offet */ +/* and length of the field to read as calculated by TDBCSV::ReadDB. */ +/***********************************************************************/ +void CSVCOL::ReadColumn(PGLOBAL g) + { + int rc; + PTDBCSV tdbp = (PTDBCSV)To_Tdb; + + /*********************************************************************/ + /* If physical reading of the line was deferred, do it now. */ + /*********************************************************************/ + if (!tdbp->IsRead()) + if ((rc = tdbp->ReadBuffer(g)) != RC_OK) { + if (rc == RC_EF) + sprintf(g->Message, MSG(INV_DEF_READ), rc); + + longjmp(g->jumper[g->jump_level], 34); + } // endif + + if (tdbp->Mode != MODE_UPDATE) { + int colen = Long; // Column length + + // Set the field offset and length for this row + Deplac = tdbp->Offset[Fldnum]; // Field offset + Long = tdbp->Fldlen[Fldnum]; // Field length + + if (trace > 1) + htrc("CSV ReadColumn %s Fldnum=%d offset=%d fldlen=%d\n", + Name, Fldnum, Deplac, Long); + + if (Long > colen && tdbp->CheckErr()) { + Long = colen; // Restore column length + sprintf(g->Message, MSG(FLD_TOO_LNG_FOR), + Fldnum + 1, Name, To_Tdb->RowNumber(g), tdbp->GetFile(g)); + longjmp(g->jumper[g->jump_level], 34); + } // endif Long + + // Now do the reading + DOSCOL::ReadColumn(g); + + // Restore column length + Long = colen; + } else { // Mode Update + // Field have been copied in TDB Field array + PSZ fp = tdbp->Field[Fldnum]; + + Value->SetValue_psz(fp); + } // endif Mode + + } // end of ReadColumn + +/***********************************************************************/ +/* WriteColumn: The column is written in TDBCSV matching Field. */ +/***********************************************************************/ +void CSVCOL::WriteColumn(PGLOBAL g) + { + char *p, buf[32]; + int flen; + PTDBCSV tdbp = (PTDBCSV)To_Tdb; + + if (trace > 1) + htrc("CSV WriteColumn: col %s R%d coluse=%.4X status=%.4X\n", + Name, tdbp->GetTdb_No(), ColUse, Status); + + flen = GetLength(); + + if (trace > 1) + htrc("Lrecl=%d Long=%d field=%d coltype=%d colval=%p\n", + tdbp->Lrecl, Long, flen, Buf_Type, Value); + + /*********************************************************************/ + /* Check whether the new value has to be converted to Buf_Type. */ + /*********************************************************************/ + if (Value != To_Val) + Value->SetValue_pval(To_Val, false); // Convert the updated value + + /*********************************************************************/ + /* Get the string representation of the column value. */ + /*********************************************************************/ + p = Value->ShowValue(buf); + + if (trace > 1) + htrc("new length(%p)=%d\n", p, strlen(p)); + + if ((signed)strlen(p) > flen) { + sprintf(g->Message, MSG(BAD_FLD_LENGTH), Name, p, flen); + longjmp(g->jumper[g->jump_level], 34); + } // endif + + if (trace > 1) + htrc("buffer=%s\n", p); + + /*********************************************************************/ + /* Updating must be done also during the first pass so writing the */ + /* updated record can be checked for acceptable record length. */ + /*********************************************************************/ + if (Fldnum < 0) { + // This can happen for wrong offset value in XDB files + sprintf(g->Message, MSG(BAD_FIELD_RANK), Fldnum + 1, Name); + longjmp(g->jumper[g->jump_level], 34); + } else + strncpy(tdbp->Field[Fldnum], p, flen); + + if (trace > 1) + htrc(" col written: '%s'\n", p); + + } // end of WriteColumn + +/* ------------------------ End of TabFmt ---------------------------- */ |