From 30c4b0ebc24fe0106e146b1f6577a4150e71e258 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Thu, 7 Feb 2013 13:34:27 +0400 Subject: - Fixing TAB to 2 spaces - Fixing line endings from "\r\n" to "\n" --- storage/connect/tabfmt.cpp | 2782 ++++++++++++++++++++++---------------------- 1 file changed, 1391 insertions(+), 1391 deletions(-) (limited to 'storage/connect/tabfmt.cpp') diff --git a/storage/connect/tabfmt.cpp b/storage/connect/tabfmt.cpp index f2efa775d5e..c62337b26d1 100644 --- a/storage/connect/tabfmt.cpp +++ b/storage/connect/tabfmt.cpp @@ -1,1391 +1,1391 @@ -/************* TabFmt C++ Program Source Code File (.CPP) **************/ -/* PROGRAM NAME: TABFMT */ -/* ------------- */ -/* Version 3.7 */ -/* */ -/* COPYRIGHT: */ -/* ---------- */ -/* (C) Copyright to the author Olivier BERTRAND 2001 - 2013 */ -/* */ -/* WHAT THIS PROGRAM DOES: */ -/* ----------------------- */ -/* This program are the TABFMT classes DB execution routines. */ -/* The base class CSV is comma separated files. */ -/* FMT (Formatted) files are those having a complex internal record */ -/* format described in the Format keyword of their definition. */ -/***********************************************************************/ - -/***********************************************************************/ -/* Include relevant MariaDB header file. */ -/***********************************************************************/ -#include "my_global.h" - -#if defined(WIN32) -#include -#include -#include -#include -#if defined(__BORLANDC__) -#define __MFC_COMPAT__ // To define min/max as macro -#endif -//#include -#include "osutil.h" -#else -#if defined(UNIX) -#include -#include -#include "osutil.h" -#else -#include -#endif -#include -#endif - -/***********************************************************************/ -/* Include application header files: */ -/* global.h is header containing all global declarations. */ -/* plgdbsem.h is header containing the DB application declarations. */ -/* tabdos.h is header containing the TABDOS class declarations. */ -/***********************************************************************/ -#include "global.h" -#include "plgdbsem.h" -#include "filamap.h" -#if defined(ZIP_SUPPORT) -#include "filamzip.h" -#endif // ZIP_SUPPORT -#include "tabfmt.h" -#include "tabmul.h" -#define NO_FUNC -#include "plgcnx.h" // For DB types -#include "resource.h" - -/***********************************************************************/ -/* This should be an option. */ -/***********************************************************************/ -#define MAXCOL 200 /* Default max column nb in result */ -#define TYPE_UNKNOWN 10 /* Must be greater than other types */ - -extern "C" int trace; - -/***********************************************************************/ -/* CSV Catalog utility functions. */ -/***********************************************************************/ -PQRYRES PlgAllocResult(PGLOBAL, int, int, int, int *, int *, - unsigned int *, bool blank = true, bool nonull = false); - -/***********************************************************************/ -/* CSVColumns: constructs the result blocks containing the description */ -/* of all the columns of a CSV file that will be retrieved by #GetData.*/ -/* Note: the algorithm to set the type is based on the internal values */ -/* of types (TYPE_STRING < TYPE_FLOAT < TYPE_INT) (1 < 2 < 7). */ -/* If these values are changed, this will have to be revisited. */ -/***********************************************************************/ -PQRYRES CSVColumns(PGLOBAL g, char *fn, char sep, char q, int hdr, int mxr) - { - static int dbtype[] = {DB_CHAR, DB_SHORT, DB_CHAR, - DB_INT, DB_INT, DB_SHORT}; - static int buftyp[] = {TYPE_STRING, TYPE_SHORT, TYPE_STRING, - TYPE_INT, TYPE_INT, TYPE_SHORT}; - static unsigned int length[] = {6, 6, 8, 10, 10, 6}; - char *p, *colname[MAXCOL], dechar, filename[_MAX_PATH], buf[4096]; - int i, imax, hmax, n, nerr, phase, blank, digit, dec, type; - int ncol = sizeof(dbtype) / sizeof(int); - int num_read = 0, num_max = 10000000; // Statistics - int len[MAXCOL], typ[MAXCOL], prc[MAXCOL]; - FILE *infile; - PQRYRES qrp; - PCOLRES crp; - -// num_max = atoi(p+1); // Max num of record to test -#if defined(WIN32) - if (strnicmp(setlocale(LC_NUMERIC, NULL), "French", 6)) - dechar = '.'; - else - dechar = ','; -#else // !WIN32 - dechar = '.'; -#endif // !WIN32 - - if (trace) - htrc("File %s sep=%c q=%c hdr=%d mxr=%d\n", - SVP(fn), sep, q, hdr, mxr); - - if (!fn) { - strcpy(g->Message, MSG(MISSING_FNAME)); - return NULL; - } // endif fn - - imax = hmax = nerr = 0; - mxr = max(0, mxr); - - for (i = 0; i < MAXCOL; i++) { - colname[i] = NULL; - len[i] = 0; - typ[i] = TYPE_UNKNOWN; - prc[i] = 0; - } // endfor i - - /*********************************************************************/ - /* Open the input file. */ - /*********************************************************************/ - PlugSetPath(filename, fn, PlgGetDataPath(g)); - - if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "r"))) - return NULL; - - if (hdr) { - /*******************************************************************/ - /* Make the column names from the first line. */ - /*******************************************************************/ - phase = 0; - - if (fgets(buf, sizeof(buf), infile)) { - n = strlen(buf) + 1; - buf[n - 2] = '\0'; -#if defined(UNIX) - // The file can be imported from Windows - if (buf[n - 3] == '\r') - buf[n - 3] = 0; -#endif // UNIX - p = (char*)PlugSubAlloc(g, NULL, n); - memcpy(p, buf, n); - - //skip leading blanks - for (; *p == ' '; p++) ; - - if (q && *p == q) { - // Header is quoted - p++; - phase = 1; - } // endif q - - colname[0] = p; - } else { - sprintf(g->Message, MSG(FILE_IS_EMPTY), fn); - goto err; - } // endif's - - for (i = 1; *p; p++) - if (phase == 1 && *p == q) { - *p = '\0'; - phase = 0; - } else if (*p == sep && !phase) { - *p = '\0'; - - //skip leading blanks - for (; *(p+1) == ' '; p++) ; - - if (q && *(p+1) == q) { - // Header is quoted - p++; - phase = 1; - } // endif q - - colname[i++] = p + 1; - } // endif sep - - num_read++; - imax = hmax = i; - - for (i = 0; i < hmax; i++) - length[0] = max(length[0], strlen(colname[i])); - - } // endif hdr - - for (num_read++; num_read <= num_max; num_read++) { - /*******************************************************************/ - /* Now start the reading process. Read one line. */ - /*******************************************************************/ - if (fgets(buf, sizeof(buf), infile)) { - n = strlen(buf); - buf[n - 1] = '\0'; -#if defined(UNIX) - // The file can be imported from Windows - if (buf[n - 2] == '\r') - buf[n - 2] = 0; -#endif // UNIX - } else if (feof(infile)) { - sprintf(g->Message, MSG(EOF_AFTER_LINE), num_read -1); - break; - } else { - sprintf(g->Message, MSG(ERR_READING_REC), num_read, fn); - goto err; - } // endif's - - /*******************************************************************/ - /* Make the test for field lengths. */ - /*******************************************************************/ - i = n = phase = blank = digit = dec = 0; - - for (p = buf; *p; p++) - if (*p == sep) { - if (phase != 1) { - if (i == MAXCOL - 1) { - sprintf(g->Message, MSG(TOO_MANY_FIELDS), num_read, fn); - goto err; - } // endif i - - if (n) { - len[i] = max(len[i], n); - type = (digit || (dec && n == 1)) ? TYPE_STRING - : (dec) ? TYPE_FLOAT : TYPE_INT; - typ[i] = min(type, typ[i]); - prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]); - } // endif n - - i++; - n = phase = blank = digit = dec = 0; - } else // phase == 1 - n++; - - } else if (*p == ' ') { - if (phase < 2) - n++; - - if (blank) - digit = 1; - - } else if (*p == q) { - if (phase == 0) { - if (blank) - if (++nerr > mxr) { - sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); - goto err; - } else - goto skip; - - n = 0; - phase = digit = 1; - } else if (phase == 1) { - if (*(p+1) == q) { - // This is currently not implemented for CSV tables -// if (++nerr > mxr) { -// sprintf(g->Message, MSG(QUOTE_IN_QUOTE), num_read); -// goto err; -// } else -// goto skip; - - p++; - n++; - } else - phase = 2; - - } else if (++nerr > mxr) { // phase == 2 - sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); - goto err; - } else - goto skip; - - } else { - if (phase == 2) - if (++nerr > mxr) { - sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); - goto err; - } else - goto skip; - - // isdigit cannot be used here because of debug assert - if (!strchr("0123456789", *p)) { - if (!digit && *p == dechar) - dec = 1; // Decimal point found - else if (blank || !(*p == '-' || *p == '+')) - digit = 1; - - } else if (dec) - dec++; // More decimals - - n++; - blank = 1; - } // endif's *p - - if (phase == 1) - if (++nerr > mxr) { - sprintf(g->Message, MSG(UNBALANCE_QUOTE), num_read); - goto err; - } else - goto skip; - - if (n) { - len[i] = max(len[i], n); - type = (digit || n == 0 || (dec && n == 1)) ? TYPE_STRING - : (dec) ? TYPE_FLOAT : TYPE_INT; - typ[i] = min(type, typ[i]); - prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]); - } // endif n - - imax = max(imax, i+1); - skip: ; // Skip erroneous line - } // endfor num_read - - if (trace) { - htrc("imax=%d Lengths:", imax); - - for (i = 0; i < imax; i++) - htrc(" %d", len[i]); - - htrc("\n"); - } // endif trace - - fclose(infile); - - if (trace) - htrc("CSVColumns: imax=%d hmax=%d len=%d\n", - imax, hmax, length[0]); - - /*********************************************************************/ - /* Allocate the structures used to refer to the result set. */ - /*********************************************************************/ - qrp = PlgAllocResult(g, ncol, imax, IDS_COLUMNS + 3, - dbtype, buftyp, length); - qrp->Nblin = imax; - - /*********************************************************************/ - /* Now get the results into blocks. */ - /*********************************************************************/ - for (i = 0; i < imax; i++) { - if (i >= hmax) { - sprintf(buf, "COL%.3d", i+1); - p = buf; - } else - p = colname[i]; - - if (typ[i] == TYPE_UNKNOWN) // Void column - typ[i] = TYPE_STRING; - - crp = qrp->Colresp; // Column Name - crp->Kdata->SetValue(p, i); - crp = crp->Next; // Data Type - crp->Kdata->SetValue(typ[i], i); - crp = crp->Next; // Type Name - crp->Kdata->SetValue(GetTypeName(typ[i]), i); - crp = crp->Next; // Precision - crp->Kdata->SetValue(len[i], i); - crp = crp->Next; // Length - crp->Kdata->SetValue(len[i], i); - crp = crp->Next; // Scale (precision) - crp->Kdata->SetValue(prc[i], i); - } // endfor i - - /*********************************************************************/ - /* Return the result pointer for use by GetData routines. */ - /*********************************************************************/ - return qrp; - - err: - fclose(infile); - return NULL; - } // end of CSVCColumns - -/* --------------------------- Class CSVDEF -------------------------- */ - -/***********************************************************************/ -/* CSVDEF constructor. */ -/***********************************************************************/ -CSVDEF::CSVDEF(void) - { - Fmtd = Accept = Header = false; - Maxerr = 0; - Quoted = -1; - Sep = ','; - Qot = '\0'; - } // end of CSVDEF constructor - -/***********************************************************************/ -/* DefineAM: define specific AM block values from XDB file. */ -/***********************************************************************/ -bool CSVDEF::DefineAM(PGLOBAL g, LPCSTR am, int poff) - { - char buf[8]; - - // Double check correctness of offset values - for (PCOLDEF cdp = To_Cols; cdp; cdp = cdp->GetNext()) - if (cdp->GetOffset() < 1) { - strcpy(g->Message, MSG(BAD_OFFSET_VAL)); - return true; - } // endif Offset - - // Call DOSDEF DefineAM with am=CSV so FMT is not confused with FIX - if (DOSDEF::DefineAM(g, "CSV", poff)) - return true; - - Cat->GetCharCatInfo(Name, "Separator", ",", buf, sizeof(buf)); - Sep = (strlen(buf) == 2 && buf[0] == '\\' && buf[1] == 't') ? '\t' : *buf; - Quoted = Cat->GetIntCatInfo(Name, "Quoted", -1); - Cat->GetCharCatInfo(Name, "Qchar", "", buf, sizeof(buf)); - Qot = *buf; - - if (Qot && Quoted < 0) - Quoted = 0; - else if (!Qot && Quoted >= 0) - Qot = '"'; - - Fmtd = (!Sep || (am && (*am == 'F' || *am == 'f'))); - Header = (Cat->GetIntCatInfo(Name, "Header", 0) != 0); - Maxerr = Cat->GetIntCatInfo(Name, "Maxerr", 0); - Accept = (Cat->GetIntCatInfo(Name, "Accept", 0) != 0); - return false; - } // end of DefineAM - -/***********************************************************************/ -/* GetTable: makes a new Table Description Block. */ -/***********************************************************************/ -PTDB CSVDEF::GetTable(PGLOBAL g, MODE mode) - { - USETEMP tmp = PlgGetUser(g)->UseTemp; - bool map = Mapped && mode != MODE_INSERT && - !(tmp != TMP_NO && mode == MODE_UPDATE) && - !(tmp == TMP_FORCE && - (mode == MODE_UPDATE || mode == MODE_DELETE)); - PTXF txfp; - PTDBASE tdbp; - - /*********************************************************************/ - /* Allocate a file processing class of the proper type. */ - /*********************************************************************/ - if (map) { - // Should be now compatible with UNIX - txfp = new(g) MAPFAM(this); - } else if (Compressed) { -#if defined(ZIP_SUPPORT) - if (Compressed == 1) - txfp = new(g) ZIPFAM(this); - else { - strcpy(g->Message, "Compress 2 not supported yet"); -// txfp = new(g) ZLBFAM(defp); - return NULL; - } // endelse -#else // !ZIP_SUPPORT - strcpy(g->Message, "Compress not supported"); - return NULL; -#endif // !ZIP_SUPPORT - } else - txfp = new(g) DOSFAM(this); - - /*********************************************************************/ - /* Allocate a TDB of the proper type. */ - /* Column blocks will be allocated only when needed. */ - /*********************************************************************/ - if (!Fmtd) - tdbp = new(g) TDBCSV(this, txfp); - else - tdbp = new(g) TDBFMT(this, txfp); - - if (Multiple) - tdbp = new(g) TDBMUL(tdbp); - - return tdbp; - } // end of GetTable - -/* -------------------------- Class TDBCSV --------------------------- */ - -/***********************************************************************/ -/* Implementation of the TDBCSV class. */ -/***********************************************************************/ -TDBCSV::TDBCSV(PCSVDEF tdp, PTXF txfp) : TDBDOS(tdp, txfp) - { -#if defined(_DEBUG) - assert (tdp); -#endif - Field = NULL; - Offset = NULL; - Fldlen = NULL; - Fields = 0; - Nerr = 0; - Quoted = tdp->Quoted; - Maxerr = tdp->Maxerr; - Accept = tdp->Accept; - Header = tdp->Header; - Sep = tdp->GetSep(); - Qot = tdp->GetQot(); - } // end of TDBCSV standard constructor - -TDBCSV::TDBCSV(PGLOBAL g, PTDBCSV tdbp) : TDBDOS(g, tdbp) - { - Fields = tdbp->Fields; - - if (Fields) { - if (tdbp->Offset) - Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); - - if (tdbp->Fldlen) - Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); - - Field = (PSZ *)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); - - for (int i = 0; i < Fields; i++) { - if (Offset) - Offset[i] = tdbp->Offset[i]; - - if (Fldlen) - Fldlen[i] = tdbp->Fldlen[i]; - - if (Field) { - assert (Fldlen); - Field[i] = (PSZ)PlugSubAlloc(g, NULL, Fldlen[i] + 1); - Field[i][Fldlen[i]] = '\0'; - } // endif Field - - } // endfor i - - } else { - Field = NULL; - Offset = NULL; - Fldlen = NULL; - } // endif Fields - - Nerr = tdbp->Nerr; - Maxerr = tdbp->Maxerr; - Quoted = tdbp->Quoted; - Accept = tdbp->Accept; - Header = tdbp->Header; - Sep = tdbp->Sep; - Qot = tdbp->Qot; - } // end of TDBCSV copy constructor - -// Method -PTDB TDBCSV::CopyOne(PTABS t) - { - PTDB tp; - PCSVCOL cp1, cp2; - PGLOBAL g = t->G; // Is this really useful ??? - - tp = new(g) TDBCSV(g, this); - - for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) { - cp2 = new(g) CSVCOL(cp1, tp); // Make a copy - NewPointer(t, cp1, cp2); - } // endfor cp1 - - return tp; - } // end of CopyOne - -/***********************************************************************/ -/* Allocate CSV column description block. */ -/***********************************************************************/ -PCOL TDBCSV::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n) - { - return new(g) CSVCOL(g, cdp, this, cprec, n); - } // end of MakeCol - -/***********************************************************************/ -/* Check whether the number of errors is greater than the maximum. */ -/***********************************************************************/ -bool TDBCSV::CheckErr(void) - { - return (++Nerr) > Maxerr; - } // end of CheckErr - -/***********************************************************************/ -/* CSV EstimatedLength. Returns an estimated minimum line length. */ -/***********************************************************************/ -int TDBCSV::EstimatedLength(PGLOBAL g) - { - if (trace) - htrc("EstimatedLength: Fields=%d Columns=%p\n", Fields, Columns); - - if (!Fields) { - PCSVCOL colp; - - for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) - if (!colp->IsSpecial()) // Not a pseudo column - Fields = max(Fields, (int)colp->Fldnum); - - if (Columns) - Fields++; // Fldnum was 0 based - - } // endif Fields - - return (int)Fields; // Number of separators if all fields are null - } // end of Estimated Length - -#if 0 -/***********************************************************************/ -/* CSV tables favor the use temporary files for Update. */ -/***********************************************************************/ -bool TDBCSV::IsUsingTemp(PGLOBAL g) - { - USETEMP usetemp = PlgGetUser(g)->UseTemp; - - return (usetemp == TMP_YES || usetemp == TMP_FORCE || - (usetemp == TMP_AUTO && Mode == MODE_UPDATE)); - } // end of IsUsingTemp -#endif // 0 (Same as TDBDOS one) - -/***********************************************************************/ -/* CSV Access Method opening routine. */ -/* First allocate the Offset and Fldlen arrays according to the */ -/* greatest field used in that query. Then call the DOS opening fnc. */ -/***********************************************************************/ -bool TDBCSV::OpenDB(PGLOBAL g) - { - bool rc = false; - PCOLDEF cdp; - PDOSDEF tdp = (PDOSDEF)To_Def; - - if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) { - // Allocate the storage used to read (or write) records - int i, len; - PCSVCOL colp; - - if (!Fields) // May have been set in TABFMT::OpenDB - if (Mode != MODE_UPDATE && Mode != MODE_INSERT) { - for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) - if (!colp->IsSpecial()) // Not a pseudo column - Fields = max(Fields, (int)colp->Fldnum); - - if (Columns) - Fields++; // Fldnum was 0 based - - } else - for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) - Fields++; - - Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); - Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); - - if (Mode == MODE_INSERT || Mode == MODE_UPDATE) { - Field = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); - Fldtyp = (bool*)PlugSubAlloc(g, NULL, sizeof(bool) * Fields); - } // endif Mode - - for (i = 0; i < Fields; i++) { - Offset[i] = 0; - Fldlen[i] = 0; - - if (Field) { - Field[i] = NULL; - Fldtyp[i] = false; - } // endif Field - - } // endfor i - - if (Field) - // Prepare writing fields - if (Mode != MODE_UPDATE) - for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) { - i = colp->Fldnum; - len = colp->GetLength(); - Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1); - Field[i][len] = '\0'; - Fldlen[i] = len; - Fldtyp[i] = IsTypeNum(colp->GetResultType()); - } // endfor colp - - else // MODE_UPDATE - for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) { - i = cdp->GetOffset() - 1; - len = cdp->GetLength(); - Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1); - Field[i][len] = '\0'; - Fldlen[i] = len; - Fldtyp[i] = IsTypeNum(cdp->GetType()); - } // endfor colp - - } // endif Use - - if (Header) { - // Check that the Lrecl is at least equal to the header line length - int headlen = 0; - PCOLDEF cdp; - PDOSDEF tdp = (PDOSDEF)To_Def; - - for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) - headlen += strlen(cdp->GetName()) + 3; // 3 if names are quoted - - if (headlen > Lrecl) { - Lrecl = headlen; - Txfp->Lrecl = headlen; - } // endif headlen - - } // endif Header - - Nerr = 0; - rc = TDBDOS::OpenDB(g); - - if (!rc && Mode == MODE_UPDATE && To_Kindex) - // Because KINDEX::Init is executed in mode READ, we must restore - // the Fldlen array that was modified when reading the table file. - for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) - Fldlen[cdp->GetOffset() - 1] = cdp->GetLength(); - - return rc; - } // end of OpenDB - -/***********************************************************************/ -/* SkipHeader: Physically skip first header line if applicable. */ -/* This is called from TDBDOS::OpenDB and must be executed before */ -/* Kindex construction if the file is accessed using an index. */ -/***********************************************************************/ -bool TDBCSV::SkipHeader(PGLOBAL g) - { - int len = GetFileLength(g); - bool rc = false; - -#if defined(_DEBUG) - if (len < 0) - return true; -#endif // _DEBUG - - if (Header) { - if (Mode == MODE_INSERT) { - if (!len) { - // New file, the header line must be constructed and written - int i, n = 0; - int hlen = 0; - bool q = Qot && Quoted > 0; - PCOLDEF cdp; - - // Estimate the length of the header list - for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext()) { - hlen += (1 + strlen(cdp->GetName())); - hlen += ((q) ? 2 : 0); - n++; // Calculate the number of columns - } // endfor cdp - - if (hlen > Lrecl) { - sprintf(g->Message, MSG(LRECL_TOO_SMALL), hlen); - return true; - } // endif hlen - - // File is empty, write a header record - memset(To_Line, 0, Lrecl); - - // The column order in the file is given by the offset value - for (i = 1; i <= n; i++) - for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext()) - if (cdp->GetOffset() == i) { - if (q) - To_Line[strlen(To_Line)] = Qot; - - strcat(To_Line, cdp->GetName()); - - if (q) - To_Line[strlen(To_Line)] = Qot; - - if (i < n) - To_Line[strlen(To_Line)] = Sep; - - } // endif Offset - - rc = (Txfp->WriteBuffer(g) == RC_FX); - } // endif !FileLength - - } else if (Mode == MODE_DELETE) { - if (len) - rc = (Txfp->SkipRecord(g, true) == RC_FX); - - } else if (len) // !Insert && !Delete - rc = (Txfp->SkipRecord(g, false) == RC_FX || Txfp->RecordPos(g)); - - } // endif Header - - return rc; - } // end of SkipHeader - -/***********************************************************************/ -/* ReadBuffer: Physical read routine for the CSV access method. */ -/***********************************************************************/ -int TDBCSV::ReadBuffer(PGLOBAL g) - { - char *p1, *p2, *p = NULL; - int i, n, len, rc = Txfp->ReadBuffer(g); - bool bad = false; - - if (trace > 1) - htrc("CSV: Row is '%s' rc=%d\n", To_Line, rc); - - if (rc != RC_OK || !Fields) - return rc; - else - p2 = To_Line; - - // Find the offsets and lengths of the columns for this row - for (i = 0; i < Fields; i++) { - if (!bad) { - if (Qot && *p2 == Qot) { // Quoted field - for (n = 0, p1 = ++p2; (p = strchr(p1, Qot)); p1 = p + 2) - if (*(p + 1) == Qot) - n++; // Doubled internal quotes - else - break; // Final quote - - if (p) { - len = p++ - p2; - -// if (Sep != ' ') -// for (; *p == ' '; p++) ; // Skip blanks - - if (*p != Sep && i != Fields - 1) { // Should be the separator - if (CheckErr()) { - sprintf(g->Message, MSG(MISSING_FIELD), - i+1, Name, RowNumber(g)); - return RC_FX; - } else if (Accept) - bad = true; - else - return RC_NF; - - } // endif p - - if (n) { - int j, k; - - // Suppress the double of internal quotes - for (j = k = 0; j < len; j++, k++) { - if (p2[j] == Qot) - j++; // skip first one - - p2[k] = p2[j]; - } // endfor i, j - - len -= n; - } // endif n - - } else if (CheckErr()) { - sprintf(g->Message, MSG(BAD_QUOTE_FIELD), - Name, i+1, RowNumber(g)); - return RC_FX; - } else if (Accept) { - len = strlen(p2); - bad = true; - } else - return RC_NF; - - } else if ((p = strchr(p2, Sep))) - len = p - p2; - else if (i == Fields - 1) - len = strlen(p2); - else if (Accept && Maxerr == 0) { - len = strlen(p2); - bad = true; - } else if (CheckErr()) { - sprintf(g->Message, MSG(MISSING_FIELD), i+1, Name, RowNumber(g)); - return RC_FX; - } else if (Accept) { - len = strlen(p2); - bad = true; - } else - return RC_NF; - - } else - len = 0; - - Offset[i] = p2 - To_Line; - - if (Mode != MODE_UPDATE) - Fldlen[i] = len; - else if (len > Fldlen[i]) { - sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, RowNumber(g)); - return RC_FX; - } else { - strncpy(Field[i], p2, len); - Field[i][len] = '\0'; - } // endif Mode - - if (p) - p2 = p + 1; - - } // endfor i - - return rc; - } // end of ReadBuffer - -/***********************************************************************/ -/* Data Base write routine CSV file access method. */ -/***********************************************************************/ -int TDBCSV::WriteDB(PGLOBAL g) - { - char sep[2], qot[2]; - int i, nlen, oldlen = strlen(To_Line); - - if (trace > 1) - htrc("CSV WriteDB: R%d Mode=%d key=%p link=%p\n", - Tdb_No, Mode, To_Key_Col, To_Link); - - // Before writing the line we must check its length - if ((nlen = CheckWrite(g)) < 0) - return RC_FX; - - // Before writing the line we must make it - sep[0] = Sep; - sep[1] = '\0'; - qot[0] = Qot; - qot[1] = '\0'; - *To_Line = '\0'; - - for (i = 0; i < Fields; i++) { - if (i) - strcat(To_Line, sep); - - if (Field[i]) - if (!strlen(Field[i])) { - // Generally null fields are not quoted - if (Quoted > 2) - // Except if explicitely required - strcat(strcat(To_Line, qot), qot); - - } else if (Qot && (strchr(Field[i], Sep) || *Field[i] == Qot - || Quoted > 1 || (Quoted == 1 && !Fldtyp[i]))) - if (strchr(Field[i], Qot)) { - // Field contains quotes that must be doubled - int j, k = strlen(To_Line), n = strlen(Field[i]); - - To_Line[k++] = Qot; - - for (j = 0; j < n; j++) { - if (Field[i][j] == Qot) - To_Line[k++] = Qot; - - To_Line[k++] = Field[i][j]; - } // endfor j - - To_Line[k++] = Qot; - To_Line[k] = '\0'; - } else - strcat(strcat(strcat(To_Line, qot), Field[i]), qot); - - else - strcat(To_Line, Field[i]); - - } // endfor i - -#if defined(_DEBUG) - assert ((unsigned)nlen == strlen(To_Line)); -#endif - - if (Mode == MODE_UPDATE && nlen < oldlen - && !((PDOSFAM)Txfp)->GetUseTemp()) { - // In Update mode with no temp file, line length must not change - To_Line[nlen] = Sep; - - for (nlen++; nlen < oldlen; nlen++) - To_Line[nlen] = ' '; - - To_Line[nlen] = '\0'; - } // endif - - if (trace > 1) - htrc("Write: line is=%s", To_Line); - - /*********************************************************************/ - /* Now start the writing process. */ - /*********************************************************************/ - return Txfp->WriteBuffer(g); - } // end of WriteDB - -/***********************************************************************/ -/* Check whether a new line fit in the file lrecl size. */ -/***********************************************************************/ -int TDBCSV::CheckWrite(PGLOBAL g) - { - int maxlen, n, nlen = (Fields - 1); - - if (trace > 1) - htrc("CheckWrite: R%d Mode=%d\n", Tdb_No, Mode); - - // Before writing the line we must check its length - maxlen = (Mode == MODE_UPDATE && !Txfp->GetUseTemp()) - ? strlen(To_Line) : Lrecl; - - // Check whether record is too int - for (int i = 0; i < Fields; i++) - if (Field[i]) { - if (!(n = strlen(Field[i]))) - n += (Quoted > 2 ? 2 : 0); - else if (strchr(Field[i], Sep) || (Qot && *Field[i] == Qot) - || Quoted > 1 || (Quoted == 1 && !Fldtyp[i])) - if (!Qot) { - sprintf(g->Message, MSG(SEP_IN_FIELD), i + 1); - return -1; - } else { - // Quotes inside a quoted field must be doubled - char *p1, *p2; - - for (p1 = Field[i]; (p2 = strchr(p1, Qot)); p1 = p2 + 1) - n++; - - n += 2; // Outside quotes - } // endif - - if ((nlen += n) > maxlen) { - strcpy(g->Message, MSG(LINE_TOO_LONG)); - return -1; - } // endif nlen - - } // endif Field - - return nlen; - } // end of CheckWrite - -/* ------------------------------------------------------------------- */ - -/***********************************************************************/ -/* Implementation of the TDBFMT class. */ -/***********************************************************************/ -TDBFMT::TDBFMT(PGLOBAL g, PTDBFMT tdbp) : TDBCSV(g, tdbp) - { - FldFormat = tdbp->FldFormat; - To_Fld = tdbp->To_Fld; - FmtTest = tdbp->FmtTest; - Linenum = tdbp->Linenum; - } // end of TDBFMT copy constructor - -// Method -PTDB TDBFMT::CopyOne(PTABS t) - { - PTDB tp; - PCSVCOL cp1, cp2; -//PFMTCOL cp1, cp2; - PGLOBAL g = t->G; // Is this really useful ??? - - tp = new(g) TDBFMT(g, this); - - for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) { -//for (cp1 = (PFMTCOL)Columns; cp1; cp1 = (PFMTCOL)cp1->GetNext()) { - cp2 = new(g) CSVCOL(cp1, tp); // Make a copy -// cp2 = new(g) FMTCOL(cp1, tp); // Make a copy - NewPointer(t, cp1, cp2); - } // endfor cp1 - - return tp; - } // end of CopyOne - -/***********************************************************************/ -/* Allocate FMT column description block. */ -/***********************************************************************/ -PCOL TDBFMT::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n) - { - return new(g) CSVCOL(g, cdp, this, cprec, n); -//return new(g) FMTCOL(cdp, this, cprec, n); - } // end of MakeCol - -/***********************************************************************/ -/* FMT EstimatedLength. Returns an estimated minimum line length. */ -/* The big problem here is how can we astimated that minimum ? */ -/***********************************************************************/ -int TDBFMT::EstimatedLength(PGLOBAL g) - { - // This is rather stupid !!! - return ((PDOSDEF)To_Def)->GetEnding() + (int)((Lrecl / 10) + 1); - } // end of EstimatedLength - -/***********************************************************************/ -/* FMT Access Method opening routine. */ -/***********************************************************************/ -bool TDBFMT::OpenDB(PGLOBAL g) - { - Linenum = 0; - - if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) { - // Make the formats used to read records - PSZ pfm; - int i, n; - PCSVCOL colp; - PCOLDEF cdp; - PDOSDEF tdp = (PDOSDEF)To_Def; - -// if (Mode != MODE_UPDATE) { - for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) - if (!colp->IsSpecial()) // Not a pseudo column - Fields = max(Fields, (int)colp->Fldnum); - - if (Columns) - Fields++; // Fldnum was 0 based - -// } else -// for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) -// Fields++; - - To_Fld = PlugSubAlloc(g, NULL, Lrecl + 1); - FldFormat = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); - memset(FldFormat, 0, sizeof(PSZ) * Fields); - FmtTest = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); - memset(FmtTest, 0, sizeof(int) * Fields); - - // Get the column formats - for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) - if ((i = cdp->GetOffset() - 1) < Fields) { - if (!(pfm = cdp->GetFmt())) { - sprintf(g->Message, MSG(NO_FLD_FORMAT), i + 1, Name); - return true; - } // endif pfm - - // Roughly check the Fmt format - if ((n = strlen(pfm) - 2) < 4) { - sprintf(g->Message, MSG(BAD_FLD_FORMAT), i + 1, Name); - return true; - } // endif n - - FldFormat[i] = (PSZ)PlugSubAlloc(g, NULL, n + 5); - strcpy(FldFormat[i], pfm); - - if (!strcmp(pfm + n, "%m")) { - // This is a field that can be missing. Flag it so it can - // be handled with special processing. - FldFormat[i][n+1] = 'n'; // To have sscanf normal processing - FmtTest[i] = 2; - } else if (i+1 < Fields && strcmp(pfm + n, "%n")) { - // There are trailing characters after the field contents - // add a marker for the next field start position. - strcat(FldFormat[i], "%n"); - FmtTest[i] = 1; - } // endif's - - } // endif i - - } // endif Use - - return TDBCSV::OpenDB(g); - } // end of OpenDB - -/***********************************************************************/ -/* ReadBuffer: Physical read routine for the FMT access method. */ -/***********************************************************************/ -int TDBFMT::ReadBuffer(PGLOBAL g) - { - int i, len, n, deb, fin, nwp, pos = 0, rc; - bool bad = false; - - if ((rc = Txfp->ReadBuffer(g)) != RC_OK || !Fields) - return rc; - else - ++Linenum; - - if (trace > 1) - htrc("FMT: Row %d is '%s' rc=%d\n", Linenum, To_Line, rc); - - // Find the offsets and lengths of the columns for this row - for (i = 0; i < Fields; i++) { - if (!bad) { - deb = fin = -1; - - if (!FldFormat[i]) { - n = 0; - } else if (FmtTest[i] == 1) { - nwp = -1; - n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin, &nwp); - } else { - n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin); - - if (n != 1 && (deb >= 0 || i == Fields - 1) && FmtTest[i] == 2) { - // Missing optional field, not an error - n = 1; - - if (i == Fields - 1) - fin = deb = 0; - else - fin = deb; - - } // endif n - - nwp = fin; - } // endif i - - if (n != 1 || deb < 0 || fin < 0 || nwp < 0) { - // This is to avoid a very strange sscanf bug occuring - // with fields that ends with a null character. - // This bug causes subsequent sscanf to return in error, - // so next lines are not parsed correctly. - sscanf("a", "%*c"); // Seems to reset things Ok - - if (CheckErr()) { - sprintf(g->Message, MSG(BAD_LINEFLD_FMT), Linenum, i + 1, Name); - return RC_FX; - } else if (Accept) - bad = true; - else - return RC_NF; - - } // endif n... - - } // endif !bad - - if (!bad) { - Offset[i] = pos + deb; - len = fin - deb; - } else { - nwp = 0; - Offset[i] = pos; - len = 0; - } // endif bad - -// if (Mode != MODE_UPDATE) - Fldlen[i] = len; -// else if (len > Fldlen[i]) { -// sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, To_Tdb->RowNumber(g)); -// return RC_FX; -// } else { -// strncpy(Field[i], To_Line + pos, len); -// Field[i][len] = '\0'; -// } // endif Mode - - pos += nwp; - } // endfor i - - if (bad) - Nerr++; - else - sscanf("a", "%*c"); // Seems to reset things Ok - - return rc; - } // end of ReadBuffer - -/***********************************************************************/ -/* Data Base write routine FMT file access method. */ -/***********************************************************************/ -int TDBFMT::WriteDB(PGLOBAL g) - { - sprintf(g->Message, MSG(FMT_WRITE_NIY), "FMT"); - return RC_FX; // NIY - } // end of WriteDB - -// ------------------------ CSVCOL functions ---------------------------- - -/***********************************************************************/ -/* CSVCOL public constructor */ -/***********************************************************************/ -CSVCOL::CSVCOL(PGLOBAL g, PCOLDEF cdp, PTDB tdbp, PCOL cprec, int i) - : DOSCOL(g, cdp, tdbp, cprec, i, "CSV") - { - Fldnum = Deplac - 1; - Deplac = 0; - } // end of CSVCOL constructor - -/***********************************************************************/ -/* CSVCOL constructor used for copying columns. */ -/* tdbp is the pointer to the new table descriptor. */ -/***********************************************************************/ -CSVCOL::CSVCOL(CSVCOL *col1, PTDB tdbp) : DOSCOL(col1, tdbp) - { - Fldnum = col1->Fldnum; - } // end of CSVCOL copy constructor - -/***********************************************************************/ -/* VarSize: This function tells UpdateDB whether or not the block */ -/* optimization file must be redone if this column is updated, even */ -/* it is not sorted or clustered. This applies to a blocked table, */ -/* because if it is updated using a temporary file, the block size */ -/* may be modified. */ -/***********************************************************************/ -bool CSVCOL::VarSize(void) - { - PTXF txfp = ((PTDBCSV)To_Tdb)->Txfp; - - if (txfp->IsBlocked() && txfp->GetUseTemp()) - // Blocked table using a temporary file - return true; - else - return false; - - } // end VarSize - -/***********************************************************************/ -/* ReadColumn: call DOSCOL::ReadColumn after having set the offet */ -/* and length of the field to read as calculated by TDBCSV::ReadDB. */ -/***********************************************************************/ -void CSVCOL::ReadColumn(PGLOBAL g) - { - int rc; - PTDBCSV tdbp = (PTDBCSV)To_Tdb; - - /*********************************************************************/ - /* If physical reading of the line was deferred, do it now. */ - /*********************************************************************/ - if (!tdbp->IsRead()) - if ((rc = tdbp->ReadBuffer(g)) != RC_OK) { - if (rc == RC_EF) - sprintf(g->Message, MSG(INV_DEF_READ), rc); - - longjmp(g->jumper[g->jump_level], 34); - } // endif - - if (tdbp->Mode != MODE_UPDATE) { - int colen = Long; // Column length - - // Set the field offset and length for this row - Deplac = tdbp->Offset[Fldnum]; // Field offset - Long = tdbp->Fldlen[Fldnum]; // Field length - - if (trace > 1) - htrc("CSV ReadColumn %s Fldnum=%d offset=%d fldlen=%d\n", - Name, Fldnum, Deplac, Long); - - if (Long > colen && tdbp->CheckErr()) { - Long = colen; // Restore column length - sprintf(g->Message, MSG(FLD_TOO_LNG_FOR), - Fldnum + 1, Name, To_Tdb->RowNumber(g), tdbp->GetFile(g)); - longjmp(g->jumper[g->jump_level], 34); - } // endif Long - - // Now do the reading - DOSCOL::ReadColumn(g); - - // Restore column length - Long = colen; - } else { // Mode Update - // Field have been copied in TDB Field array - PSZ fp = tdbp->Field[Fldnum]; - - Value->SetValue_psz(fp); - } // endif Mode - - } // end of ReadColumn - -/***********************************************************************/ -/* WriteColumn: The column is written in TDBCSV matching Field. */ -/***********************************************************************/ -void CSVCOL::WriteColumn(PGLOBAL g) - { - char *p, buf[32]; - int flen; - PTDBCSV tdbp = (PTDBCSV)To_Tdb; - - if (trace > 1) - htrc("CSV WriteColumn: col %s R%d coluse=%.4X status=%.4X\n", - Name, tdbp->GetTdb_No(), ColUse, Status); - - flen = GetLength(); - - if (trace > 1) - htrc("Lrecl=%d Long=%d field=%d coltype=%d colval=%p\n", - tdbp->Lrecl, Long, flen, Buf_Type, Value); - - /*********************************************************************/ - /* Check whether the new value has to be converted to Buf_Type. */ - /*********************************************************************/ - if (Value != To_Val) - Value->SetValue_pval(To_Val, false); // Convert the updated value - - /*********************************************************************/ - /* Get the string representation of the column value. */ - /*********************************************************************/ - p = Value->ShowValue(buf); - - if (trace > 1) - htrc("new length(%p)=%d\n", p, strlen(p)); - - if ((signed)strlen(p) > flen) { - sprintf(g->Message, MSG(BAD_FLD_LENGTH), Name, p, flen); - longjmp(g->jumper[g->jump_level], 34); - } // endif - - if (trace > 1) - htrc("buffer=%s\n", p); - - /*********************************************************************/ - /* Updating must be done also during the first pass so writing the */ - /* updated record can be checked for acceptable record length. */ - /*********************************************************************/ - if (Fldnum < 0) { - // This can happen for wrong offset value in XDB files - sprintf(g->Message, MSG(BAD_FIELD_RANK), Fldnum + 1, Name); - longjmp(g->jumper[g->jump_level], 34); - } else - strncpy(tdbp->Field[Fldnum], p, flen); - - if (trace > 1) - htrc(" col written: '%s'\n", p); - - } // end of WriteColumn - -/* ------------------------ End of TabFmt ---------------------------- */ +/************* TabFmt C++ Program Source Code File (.CPP) **************/ +/* PROGRAM NAME: TABFMT */ +/* ------------- */ +/* Version 3.7 */ +/* */ +/* COPYRIGHT: */ +/* ---------- */ +/* (C) Copyright to the author Olivier BERTRAND 2001 - 2013 */ +/* */ +/* WHAT THIS PROGRAM DOES: */ +/* ----------------------- */ +/* This program are the TABFMT classes DB execution routines. */ +/* The base class CSV is comma separated files. */ +/* FMT (Formatted) files are those having a complex internal record */ +/* format described in the Format keyword of their definition. */ +/***********************************************************************/ + +/***********************************************************************/ +/* Include relevant MariaDB header file. */ +/***********************************************************************/ +#include "my_global.h" + +#if defined(WIN32) +#include +#include +#include +#include +#if defined(__BORLANDC__) +#define __MFC_COMPAT__ // To define min/max as macro +#endif +//#include +#include "osutil.h" +#else +#if defined(UNIX) +#include +#include +#include "osutil.h" +#else +#include +#endif +#include +#endif + +/***********************************************************************/ +/* Include application header files: */ +/* global.h is header containing all global declarations. */ +/* plgdbsem.h is header containing the DB application declarations. */ +/* tabdos.h is header containing the TABDOS class declarations. */ +/***********************************************************************/ +#include "global.h" +#include "plgdbsem.h" +#include "filamap.h" +#if defined(ZIP_SUPPORT) +#include "filamzip.h" +#endif // ZIP_SUPPORT +#include "tabfmt.h" +#include "tabmul.h" +#define NO_FUNC +#include "plgcnx.h" // For DB types +#include "resource.h" + +/***********************************************************************/ +/* This should be an option. */ +/***********************************************************************/ +#define MAXCOL 200 /* Default max column nb in result */ +#define TYPE_UNKNOWN 10 /* Must be greater than other types */ + +extern "C" int trace; + +/***********************************************************************/ +/* CSV Catalog utility functions. */ +/***********************************************************************/ +PQRYRES PlgAllocResult(PGLOBAL, int, int, int, int *, int *, + unsigned int *, bool blank = true, bool nonull = false); + +/***********************************************************************/ +/* CSVColumns: constructs the result blocks containing the description */ +/* of all the columns of a CSV file that will be retrieved by #GetData.*/ +/* Note: the algorithm to set the type is based on the internal values */ +/* of types (TYPE_STRING < TYPE_FLOAT < TYPE_INT) (1 < 2 < 7). */ +/* If these values are changed, this will have to be revisited. */ +/***********************************************************************/ +PQRYRES CSVColumns(PGLOBAL g, char *fn, char sep, char q, int hdr, int mxr) + { + static int dbtype[] = {DB_CHAR, DB_SHORT, DB_CHAR, + DB_INT, DB_INT, DB_SHORT}; + static int buftyp[] = {TYPE_STRING, TYPE_SHORT, TYPE_STRING, + TYPE_INT, TYPE_INT, TYPE_SHORT}; + static unsigned int length[] = {6, 6, 8, 10, 10, 6}; + char *p, *colname[MAXCOL], dechar, filename[_MAX_PATH], buf[4096]; + int i, imax, hmax, n, nerr, phase, blank, digit, dec, type; + int ncol = sizeof(dbtype) / sizeof(int); + int num_read = 0, num_max = 10000000; // Statistics + int len[MAXCOL], typ[MAXCOL], prc[MAXCOL]; + FILE *infile; + PQRYRES qrp; + PCOLRES crp; + +// num_max = atoi(p+1); // Max num of record to test +#if defined(WIN32) + if (strnicmp(setlocale(LC_NUMERIC, NULL), "French", 6)) + dechar = '.'; + else + dechar = ','; +#else // !WIN32 + dechar = '.'; +#endif // !WIN32 + + if (trace) + htrc("File %s sep=%c q=%c hdr=%d mxr=%d\n", + SVP(fn), sep, q, hdr, mxr); + + if (!fn) { + strcpy(g->Message, MSG(MISSING_FNAME)); + return NULL; + } // endif fn + + imax = hmax = nerr = 0; + mxr = max(0, mxr); + + for (i = 0; i < MAXCOL; i++) { + colname[i] = NULL; + len[i] = 0; + typ[i] = TYPE_UNKNOWN; + prc[i] = 0; + } // endfor i + + /*********************************************************************/ + /* Open the input file. */ + /*********************************************************************/ + PlugSetPath(filename, fn, PlgGetDataPath(g)); + + if (!(infile= global_fopen(g, MSGID_CANNOT_OPEN, filename, "r"))) + return NULL; + + if (hdr) { + /*******************************************************************/ + /* Make the column names from the first line. */ + /*******************************************************************/ + phase = 0; + + if (fgets(buf, sizeof(buf), infile)) { + n = strlen(buf) + 1; + buf[n - 2] = '\0'; +#if defined(UNIX) + // The file can be imported from Windows + if (buf[n - 3] == '\r') + buf[n - 3] = 0; +#endif // UNIX + p = (char*)PlugSubAlloc(g, NULL, n); + memcpy(p, buf, n); + + //skip leading blanks + for (; *p == ' '; p++) ; + + if (q && *p == q) { + // Header is quoted + p++; + phase = 1; + } // endif q + + colname[0] = p; + } else { + sprintf(g->Message, MSG(FILE_IS_EMPTY), fn); + goto err; + } // endif's + + for (i = 1; *p; p++) + if (phase == 1 && *p == q) { + *p = '\0'; + phase = 0; + } else if (*p == sep && !phase) { + *p = '\0'; + + //skip leading blanks + for (; *(p+1) == ' '; p++) ; + + if (q && *(p+1) == q) { + // Header is quoted + p++; + phase = 1; + } // endif q + + colname[i++] = p + 1; + } // endif sep + + num_read++; + imax = hmax = i; + + for (i = 0; i < hmax; i++) + length[0] = max(length[0], strlen(colname[i])); + + } // endif hdr + + for (num_read++; num_read <= num_max; num_read++) { + /*******************************************************************/ + /* Now start the reading process. Read one line. */ + /*******************************************************************/ + if (fgets(buf, sizeof(buf), infile)) { + n = strlen(buf); + buf[n - 1] = '\0'; +#if defined(UNIX) + // The file can be imported from Windows + if (buf[n - 2] == '\r') + buf[n - 2] = 0; +#endif // UNIX + } else if (feof(infile)) { + sprintf(g->Message, MSG(EOF_AFTER_LINE), num_read -1); + break; + } else { + sprintf(g->Message, MSG(ERR_READING_REC), num_read, fn); + goto err; + } // endif's + + /*******************************************************************/ + /* Make the test for field lengths. */ + /*******************************************************************/ + i = n = phase = blank = digit = dec = 0; + + for (p = buf; *p; p++) + if (*p == sep) { + if (phase != 1) { + if (i == MAXCOL - 1) { + sprintf(g->Message, MSG(TOO_MANY_FIELDS), num_read, fn); + goto err; + } // endif i + + if (n) { + len[i] = max(len[i], n); + type = (digit || (dec && n == 1)) ? TYPE_STRING + : (dec) ? TYPE_FLOAT : TYPE_INT; + typ[i] = min(type, typ[i]); + prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]); + } // endif n + + i++; + n = phase = blank = digit = dec = 0; + } else // phase == 1 + n++; + + } else if (*p == ' ') { + if (phase < 2) + n++; + + if (blank) + digit = 1; + + } else if (*p == q) { + if (phase == 0) { + if (blank) + if (++nerr > mxr) { + sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); + goto err; + } else + goto skip; + + n = 0; + phase = digit = 1; + } else if (phase == 1) { + if (*(p+1) == q) { + // This is currently not implemented for CSV tables +// if (++nerr > mxr) { +// sprintf(g->Message, MSG(QUOTE_IN_QUOTE), num_read); +// goto err; +// } else +// goto skip; + + p++; + n++; + } else + phase = 2; + + } else if (++nerr > mxr) { // phase == 2 + sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); + goto err; + } else + goto skip; + + } else { + if (phase == 2) + if (++nerr > mxr) { + sprintf(g->Message, MSG(MISPLACED_QUOTE), num_read); + goto err; + } else + goto skip; + + // isdigit cannot be used here because of debug assert + if (!strchr("0123456789", *p)) { + if (!digit && *p == dechar) + dec = 1; // Decimal point found + else if (blank || !(*p == '-' || *p == '+')) + digit = 1; + + } else if (dec) + dec++; // More decimals + + n++; + blank = 1; + } // endif's *p + + if (phase == 1) + if (++nerr > mxr) { + sprintf(g->Message, MSG(UNBALANCE_QUOTE), num_read); + goto err; + } else + goto skip; + + if (n) { + len[i] = max(len[i], n); + type = (digit || n == 0 || (dec && n == 1)) ? TYPE_STRING + : (dec) ? TYPE_FLOAT : TYPE_INT; + typ[i] = min(type, typ[i]); + prc[i] = max((typ[i] == TYPE_FLOAT) ? (dec - 1) : 0, prc[i]); + } // endif n + + imax = max(imax, i+1); + skip: ; // Skip erroneous line + } // endfor num_read + + if (trace) { + htrc("imax=%d Lengths:", imax); + + for (i = 0; i < imax; i++) + htrc(" %d", len[i]); + + htrc("\n"); + } // endif trace + + fclose(infile); + + if (trace) + htrc("CSVColumns: imax=%d hmax=%d len=%d\n", + imax, hmax, length[0]); + + /*********************************************************************/ + /* Allocate the structures used to refer to the result set. */ + /*********************************************************************/ + qrp = PlgAllocResult(g, ncol, imax, IDS_COLUMNS + 3, + dbtype, buftyp, length); + qrp->Nblin = imax; + + /*********************************************************************/ + /* Now get the results into blocks. */ + /*********************************************************************/ + for (i = 0; i < imax; i++) { + if (i >= hmax) { + sprintf(buf, "COL%.3d", i+1); + p = buf; + } else + p = colname[i]; + + if (typ[i] == TYPE_UNKNOWN) // Void column + typ[i] = TYPE_STRING; + + crp = qrp->Colresp; // Column Name + crp->Kdata->SetValue(p, i); + crp = crp->Next; // Data Type + crp->Kdata->SetValue(typ[i], i); + crp = crp->Next; // Type Name + crp->Kdata->SetValue(GetTypeName(typ[i]), i); + crp = crp->Next; // Precision + crp->Kdata->SetValue(len[i], i); + crp = crp->Next; // Length + crp->Kdata->SetValue(len[i], i); + crp = crp->Next; // Scale (precision) + crp->Kdata->SetValue(prc[i], i); + } // endfor i + + /*********************************************************************/ + /* Return the result pointer for use by GetData routines. */ + /*********************************************************************/ + return qrp; + + err: + fclose(infile); + return NULL; + } // end of CSVCColumns + +/* --------------------------- Class CSVDEF -------------------------- */ + +/***********************************************************************/ +/* CSVDEF constructor. */ +/***********************************************************************/ +CSVDEF::CSVDEF(void) + { + Fmtd = Accept = Header = false; + Maxerr = 0; + Quoted = -1; + Sep = ','; + Qot = '\0'; + } // end of CSVDEF constructor + +/***********************************************************************/ +/* DefineAM: define specific AM block values from XDB file. */ +/***********************************************************************/ +bool CSVDEF::DefineAM(PGLOBAL g, LPCSTR am, int poff) + { + char buf[8]; + + // Double check correctness of offset values + for (PCOLDEF cdp = To_Cols; cdp; cdp = cdp->GetNext()) + if (cdp->GetOffset() < 1) { + strcpy(g->Message, MSG(BAD_OFFSET_VAL)); + return true; + } // endif Offset + + // Call DOSDEF DefineAM with am=CSV so FMT is not confused with FIX + if (DOSDEF::DefineAM(g, "CSV", poff)) + return true; + + Cat->GetCharCatInfo(Name, "Separator", ",", buf, sizeof(buf)); + Sep = (strlen(buf) == 2 && buf[0] == '\\' && buf[1] == 't') ? '\t' : *buf; + Quoted = Cat->GetIntCatInfo(Name, "Quoted", -1); + Cat->GetCharCatInfo(Name, "Qchar", "", buf, sizeof(buf)); + Qot = *buf; + + if (Qot && Quoted < 0) + Quoted = 0; + else if (!Qot && Quoted >= 0) + Qot = '"'; + + Fmtd = (!Sep || (am && (*am == 'F' || *am == 'f'))); + Header = (Cat->GetIntCatInfo(Name, "Header", 0) != 0); + Maxerr = Cat->GetIntCatInfo(Name, "Maxerr", 0); + Accept = (Cat->GetIntCatInfo(Name, "Accept", 0) != 0); + return false; + } // end of DefineAM + +/***********************************************************************/ +/* GetTable: makes a new Table Description Block. */ +/***********************************************************************/ +PTDB CSVDEF::GetTable(PGLOBAL g, MODE mode) + { + USETEMP tmp = PlgGetUser(g)->UseTemp; + bool map = Mapped && mode != MODE_INSERT && + !(tmp != TMP_NO && mode == MODE_UPDATE) && + !(tmp == TMP_FORCE && + (mode == MODE_UPDATE || mode == MODE_DELETE)); + PTXF txfp; + PTDBASE tdbp; + + /*********************************************************************/ + /* Allocate a file processing class of the proper type. */ + /*********************************************************************/ + if (map) { + // Should be now compatible with UNIX + txfp = new(g) MAPFAM(this); + } else if (Compressed) { +#if defined(ZIP_SUPPORT) + if (Compressed == 1) + txfp = new(g) ZIPFAM(this); + else { + strcpy(g->Message, "Compress 2 not supported yet"); +// txfp = new(g) ZLBFAM(defp); + return NULL; + } // endelse +#else // !ZIP_SUPPORT + strcpy(g->Message, "Compress not supported"); + return NULL; +#endif // !ZIP_SUPPORT + } else + txfp = new(g) DOSFAM(this); + + /*********************************************************************/ + /* Allocate a TDB of the proper type. */ + /* Column blocks will be allocated only when needed. */ + /*********************************************************************/ + if (!Fmtd) + tdbp = new(g) TDBCSV(this, txfp); + else + tdbp = new(g) TDBFMT(this, txfp); + + if (Multiple) + tdbp = new(g) TDBMUL(tdbp); + + return tdbp; + } // end of GetTable + +/* -------------------------- Class TDBCSV --------------------------- */ + +/***********************************************************************/ +/* Implementation of the TDBCSV class. */ +/***********************************************************************/ +TDBCSV::TDBCSV(PCSVDEF tdp, PTXF txfp) : TDBDOS(tdp, txfp) + { +#if defined(_DEBUG) + assert (tdp); +#endif + Field = NULL; + Offset = NULL; + Fldlen = NULL; + Fields = 0; + Nerr = 0; + Quoted = tdp->Quoted; + Maxerr = tdp->Maxerr; + Accept = tdp->Accept; + Header = tdp->Header; + Sep = tdp->GetSep(); + Qot = tdp->GetQot(); + } // end of TDBCSV standard constructor + +TDBCSV::TDBCSV(PGLOBAL g, PTDBCSV tdbp) : TDBDOS(g, tdbp) + { + Fields = tdbp->Fields; + + if (Fields) { + if (tdbp->Offset) + Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + + if (tdbp->Fldlen) + Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + + Field = (PSZ *)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); + + for (int i = 0; i < Fields; i++) { + if (Offset) + Offset[i] = tdbp->Offset[i]; + + if (Fldlen) + Fldlen[i] = tdbp->Fldlen[i]; + + if (Field) { + assert (Fldlen); + Field[i] = (PSZ)PlugSubAlloc(g, NULL, Fldlen[i] + 1); + Field[i][Fldlen[i]] = '\0'; + } // endif Field + + } // endfor i + + } else { + Field = NULL; + Offset = NULL; + Fldlen = NULL; + } // endif Fields + + Nerr = tdbp->Nerr; + Maxerr = tdbp->Maxerr; + Quoted = tdbp->Quoted; + Accept = tdbp->Accept; + Header = tdbp->Header; + Sep = tdbp->Sep; + Qot = tdbp->Qot; + } // end of TDBCSV copy constructor + +// Method +PTDB TDBCSV::CopyOne(PTABS t) + { + PTDB tp; + PCSVCOL cp1, cp2; + PGLOBAL g = t->G; // Is this really useful ??? + + tp = new(g) TDBCSV(g, this); + + for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) { + cp2 = new(g) CSVCOL(cp1, tp); // Make a copy + NewPointer(t, cp1, cp2); + } // endfor cp1 + + return tp; + } // end of CopyOne + +/***********************************************************************/ +/* Allocate CSV column description block. */ +/***********************************************************************/ +PCOL TDBCSV::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n) + { + return new(g) CSVCOL(g, cdp, this, cprec, n); + } // end of MakeCol + +/***********************************************************************/ +/* Check whether the number of errors is greater than the maximum. */ +/***********************************************************************/ +bool TDBCSV::CheckErr(void) + { + return (++Nerr) > Maxerr; + } // end of CheckErr + +/***********************************************************************/ +/* CSV EstimatedLength. Returns an estimated minimum line length. */ +/***********************************************************************/ +int TDBCSV::EstimatedLength(PGLOBAL g) + { + if (trace) + htrc("EstimatedLength: Fields=%d Columns=%p\n", Fields, Columns); + + if (!Fields) { + PCSVCOL colp; + + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) + if (!colp->IsSpecial()) // Not a pseudo column + Fields = max(Fields, (int)colp->Fldnum); + + if (Columns) + Fields++; // Fldnum was 0 based + + } // endif Fields + + return (int)Fields; // Number of separators if all fields are null + } // end of Estimated Length + +#if 0 +/***********************************************************************/ +/* CSV tables favor the use temporary files for Update. */ +/***********************************************************************/ +bool TDBCSV::IsUsingTemp(PGLOBAL g) + { + USETEMP usetemp = PlgGetUser(g)->UseTemp; + + return (usetemp == TMP_YES || usetemp == TMP_FORCE || + (usetemp == TMP_AUTO && Mode == MODE_UPDATE)); + } // end of IsUsingTemp +#endif // 0 (Same as TDBDOS one) + +/***********************************************************************/ +/* CSV Access Method opening routine. */ +/* First allocate the Offset and Fldlen arrays according to the */ +/* greatest field used in that query. Then call the DOS opening fnc. */ +/***********************************************************************/ +bool TDBCSV::OpenDB(PGLOBAL g) + { + bool rc = false; + PCOLDEF cdp; + PDOSDEF tdp = (PDOSDEF)To_Def; + + if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) { + // Allocate the storage used to read (or write) records + int i, len; + PCSVCOL colp; + + if (!Fields) // May have been set in TABFMT::OpenDB + if (Mode != MODE_UPDATE && Mode != MODE_INSERT) { + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) + if (!colp->IsSpecial()) // Not a pseudo column + Fields = max(Fields, (int)colp->Fldnum); + + if (Columns) + Fields++; // Fldnum was 0 based + + } else + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + Fields++; + + Offset = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + Fldlen = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + + if (Mode == MODE_INSERT || Mode == MODE_UPDATE) { + Field = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); + Fldtyp = (bool*)PlugSubAlloc(g, NULL, sizeof(bool) * Fields); + } // endif Mode + + for (i = 0; i < Fields; i++) { + Offset[i] = 0; + Fldlen[i] = 0; + + if (Field) { + Field[i] = NULL; + Fldtyp[i] = false; + } // endif Field + + } // endfor i + + if (Field) + // Prepare writing fields + if (Mode != MODE_UPDATE) + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) { + i = colp->Fldnum; + len = colp->GetLength(); + Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1); + Field[i][len] = '\0'; + Fldlen[i] = len; + Fldtyp[i] = IsTypeNum(colp->GetResultType()); + } // endfor colp + + else // MODE_UPDATE + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) { + i = cdp->GetOffset() - 1; + len = cdp->GetLength(); + Field[i] = (PSZ)PlugSubAlloc(g, NULL, len + 1); + Field[i][len] = '\0'; + Fldlen[i] = len; + Fldtyp[i] = IsTypeNum(cdp->GetType()); + } // endfor colp + + } // endif Use + + if (Header) { + // Check that the Lrecl is at least equal to the header line length + int headlen = 0; + PCOLDEF cdp; + PDOSDEF tdp = (PDOSDEF)To_Def; + + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + headlen += strlen(cdp->GetName()) + 3; // 3 if names are quoted + + if (headlen > Lrecl) { + Lrecl = headlen; + Txfp->Lrecl = headlen; + } // endif headlen + + } // endif Header + + Nerr = 0; + rc = TDBDOS::OpenDB(g); + + if (!rc && Mode == MODE_UPDATE && To_Kindex) + // Because KINDEX::Init is executed in mode READ, we must restore + // the Fldlen array that was modified when reading the table file. + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + Fldlen[cdp->GetOffset() - 1] = cdp->GetLength(); + + return rc; + } // end of OpenDB + +/***********************************************************************/ +/* SkipHeader: Physically skip first header line if applicable. */ +/* This is called from TDBDOS::OpenDB and must be executed before */ +/* Kindex construction if the file is accessed using an index. */ +/***********************************************************************/ +bool TDBCSV::SkipHeader(PGLOBAL g) + { + int len = GetFileLength(g); + bool rc = false; + +#if defined(_DEBUG) + if (len < 0) + return true; +#endif // _DEBUG + + if (Header) { + if (Mode == MODE_INSERT) { + if (!len) { + // New file, the header line must be constructed and written + int i, n = 0; + int hlen = 0; + bool q = Qot && Quoted > 0; + PCOLDEF cdp; + + // Estimate the length of the header list + for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext()) { + hlen += (1 + strlen(cdp->GetName())); + hlen += ((q) ? 2 : 0); + n++; // Calculate the number of columns + } // endfor cdp + + if (hlen > Lrecl) { + sprintf(g->Message, MSG(LRECL_TOO_SMALL), hlen); + return true; + } // endif hlen + + // File is empty, write a header record + memset(To_Line, 0, Lrecl); + + // The column order in the file is given by the offset value + for (i = 1; i <= n; i++) + for (cdp = To_Def->GetCols(); cdp; cdp = cdp->GetNext()) + if (cdp->GetOffset() == i) { + if (q) + To_Line[strlen(To_Line)] = Qot; + + strcat(To_Line, cdp->GetName()); + + if (q) + To_Line[strlen(To_Line)] = Qot; + + if (i < n) + To_Line[strlen(To_Line)] = Sep; + + } // endif Offset + + rc = (Txfp->WriteBuffer(g) == RC_FX); + } // endif !FileLength + + } else if (Mode == MODE_DELETE) { + if (len) + rc = (Txfp->SkipRecord(g, true) == RC_FX); + + } else if (len) // !Insert && !Delete + rc = (Txfp->SkipRecord(g, false) == RC_FX || Txfp->RecordPos(g)); + + } // endif Header + + return rc; + } // end of SkipHeader + +/***********************************************************************/ +/* ReadBuffer: Physical read routine for the CSV access method. */ +/***********************************************************************/ +int TDBCSV::ReadBuffer(PGLOBAL g) + { + char *p1, *p2, *p = NULL; + int i, n, len, rc = Txfp->ReadBuffer(g); + bool bad = false; + + if (trace > 1) + htrc("CSV: Row is '%s' rc=%d\n", To_Line, rc); + + if (rc != RC_OK || !Fields) + return rc; + else + p2 = To_Line; + + // Find the offsets and lengths of the columns for this row + for (i = 0; i < Fields; i++) { + if (!bad) { + if (Qot && *p2 == Qot) { // Quoted field + for (n = 0, p1 = ++p2; (p = strchr(p1, Qot)); p1 = p + 2) + if (*(p + 1) == Qot) + n++; // Doubled internal quotes + else + break; // Final quote + + if (p) { + len = p++ - p2; + +// if (Sep != ' ') +// for (; *p == ' '; p++) ; // Skip blanks + + if (*p != Sep && i != Fields - 1) { // Should be the separator + if (CheckErr()) { + sprintf(g->Message, MSG(MISSING_FIELD), + i+1, Name, RowNumber(g)); + return RC_FX; + } else if (Accept) + bad = true; + else + return RC_NF; + + } // endif p + + if (n) { + int j, k; + + // Suppress the double of internal quotes + for (j = k = 0; j < len; j++, k++) { + if (p2[j] == Qot) + j++; // skip first one + + p2[k] = p2[j]; + } // endfor i, j + + len -= n; + } // endif n + + } else if (CheckErr()) { + sprintf(g->Message, MSG(BAD_QUOTE_FIELD), + Name, i+1, RowNumber(g)); + return RC_FX; + } else if (Accept) { + len = strlen(p2); + bad = true; + } else + return RC_NF; + + } else if ((p = strchr(p2, Sep))) + len = p - p2; + else if (i == Fields - 1) + len = strlen(p2); + else if (Accept && Maxerr == 0) { + len = strlen(p2); + bad = true; + } else if (CheckErr()) { + sprintf(g->Message, MSG(MISSING_FIELD), i+1, Name, RowNumber(g)); + return RC_FX; + } else if (Accept) { + len = strlen(p2); + bad = true; + } else + return RC_NF; + + } else + len = 0; + + Offset[i] = p2 - To_Line; + + if (Mode != MODE_UPDATE) + Fldlen[i] = len; + else if (len > Fldlen[i]) { + sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, RowNumber(g)); + return RC_FX; + } else { + strncpy(Field[i], p2, len); + Field[i][len] = '\0'; + } // endif Mode + + if (p) + p2 = p + 1; + + } // endfor i + + return rc; + } // end of ReadBuffer + +/***********************************************************************/ +/* Data Base write routine CSV file access method. */ +/***********************************************************************/ +int TDBCSV::WriteDB(PGLOBAL g) + { + char sep[2], qot[2]; + int i, nlen, oldlen = strlen(To_Line); + + if (trace > 1) + htrc("CSV WriteDB: R%d Mode=%d key=%p link=%p\n", + Tdb_No, Mode, To_Key_Col, To_Link); + + // Before writing the line we must check its length + if ((nlen = CheckWrite(g)) < 0) + return RC_FX; + + // Before writing the line we must make it + sep[0] = Sep; + sep[1] = '\0'; + qot[0] = Qot; + qot[1] = '\0'; + *To_Line = '\0'; + + for (i = 0; i < Fields; i++) { + if (i) + strcat(To_Line, sep); + + if (Field[i]) + if (!strlen(Field[i])) { + // Generally null fields are not quoted + if (Quoted > 2) + // Except if explicitely required + strcat(strcat(To_Line, qot), qot); + + } else if (Qot && (strchr(Field[i], Sep) || *Field[i] == Qot + || Quoted > 1 || (Quoted == 1 && !Fldtyp[i]))) + if (strchr(Field[i], Qot)) { + // Field contains quotes that must be doubled + int j, k = strlen(To_Line), n = strlen(Field[i]); + + To_Line[k++] = Qot; + + for (j = 0; j < n; j++) { + if (Field[i][j] == Qot) + To_Line[k++] = Qot; + + To_Line[k++] = Field[i][j]; + } // endfor j + + To_Line[k++] = Qot; + To_Line[k] = '\0'; + } else + strcat(strcat(strcat(To_Line, qot), Field[i]), qot); + + else + strcat(To_Line, Field[i]); + + } // endfor i + +#if defined(_DEBUG) + assert ((unsigned)nlen == strlen(To_Line)); +#endif + + if (Mode == MODE_UPDATE && nlen < oldlen + && !((PDOSFAM)Txfp)->GetUseTemp()) { + // In Update mode with no temp file, line length must not change + To_Line[nlen] = Sep; + + for (nlen++; nlen < oldlen; nlen++) + To_Line[nlen] = ' '; + + To_Line[nlen] = '\0'; + } // endif + + if (trace > 1) + htrc("Write: line is=%s", To_Line); + + /*********************************************************************/ + /* Now start the writing process. */ + /*********************************************************************/ + return Txfp->WriteBuffer(g); + } // end of WriteDB + +/***********************************************************************/ +/* Check whether a new line fit in the file lrecl size. */ +/***********************************************************************/ +int TDBCSV::CheckWrite(PGLOBAL g) + { + int maxlen, n, nlen = (Fields - 1); + + if (trace > 1) + htrc("CheckWrite: R%d Mode=%d\n", Tdb_No, Mode); + + // Before writing the line we must check its length + maxlen = (Mode == MODE_UPDATE && !Txfp->GetUseTemp()) + ? strlen(To_Line) : Lrecl; + + // Check whether record is too int + for (int i = 0; i < Fields; i++) + if (Field[i]) { + if (!(n = strlen(Field[i]))) + n += (Quoted > 2 ? 2 : 0); + else if (strchr(Field[i], Sep) || (Qot && *Field[i] == Qot) + || Quoted > 1 || (Quoted == 1 && !Fldtyp[i])) + if (!Qot) { + sprintf(g->Message, MSG(SEP_IN_FIELD), i + 1); + return -1; + } else { + // Quotes inside a quoted field must be doubled + char *p1, *p2; + + for (p1 = Field[i]; (p2 = strchr(p1, Qot)); p1 = p2 + 1) + n++; + + n += 2; // Outside quotes + } // endif + + if ((nlen += n) > maxlen) { + strcpy(g->Message, MSG(LINE_TOO_LONG)); + return -1; + } // endif nlen + + } // endif Field + + return nlen; + } // end of CheckWrite + +/* ------------------------------------------------------------------- */ + +/***********************************************************************/ +/* Implementation of the TDBFMT class. */ +/***********************************************************************/ +TDBFMT::TDBFMT(PGLOBAL g, PTDBFMT tdbp) : TDBCSV(g, tdbp) + { + FldFormat = tdbp->FldFormat; + To_Fld = tdbp->To_Fld; + FmtTest = tdbp->FmtTest; + Linenum = tdbp->Linenum; + } // end of TDBFMT copy constructor + +// Method +PTDB TDBFMT::CopyOne(PTABS t) + { + PTDB tp; + PCSVCOL cp1, cp2; +//PFMTCOL cp1, cp2; + PGLOBAL g = t->G; // Is this really useful ??? + + tp = new(g) TDBFMT(g, this); + + for (cp1 = (PCSVCOL)Columns; cp1; cp1 = (PCSVCOL)cp1->GetNext()) { +//for (cp1 = (PFMTCOL)Columns; cp1; cp1 = (PFMTCOL)cp1->GetNext()) { + cp2 = new(g) CSVCOL(cp1, tp); // Make a copy +// cp2 = new(g) FMTCOL(cp1, tp); // Make a copy + NewPointer(t, cp1, cp2); + } // endfor cp1 + + return tp; + } // end of CopyOne + +/***********************************************************************/ +/* Allocate FMT column description block. */ +/***********************************************************************/ +PCOL TDBFMT::MakeCol(PGLOBAL g, PCOLDEF cdp, PCOL cprec, int n) + { + return new(g) CSVCOL(g, cdp, this, cprec, n); +//return new(g) FMTCOL(cdp, this, cprec, n); + } // end of MakeCol + +/***********************************************************************/ +/* FMT EstimatedLength. Returns an estimated minimum line length. */ +/* The big problem here is how can we astimated that minimum ? */ +/***********************************************************************/ +int TDBFMT::EstimatedLength(PGLOBAL g) + { + // This is rather stupid !!! + return ((PDOSDEF)To_Def)->GetEnding() + (int)((Lrecl / 10) + 1); + } // end of EstimatedLength + +/***********************************************************************/ +/* FMT Access Method opening routine. */ +/***********************************************************************/ +bool TDBFMT::OpenDB(PGLOBAL g) + { + Linenum = 0; + + if (Use != USE_OPEN && (Columns || Mode == MODE_UPDATE)) { + // Make the formats used to read records + PSZ pfm; + int i, n; + PCSVCOL colp; + PCOLDEF cdp; + PDOSDEF tdp = (PDOSDEF)To_Def; + +// if (Mode != MODE_UPDATE) { + for (colp = (PCSVCOL)Columns; colp; colp = (PCSVCOL)colp->Next) + if (!colp->IsSpecial()) // Not a pseudo column + Fields = max(Fields, (int)colp->Fldnum); + + if (Columns) + Fields++; // Fldnum was 0 based + +// } else +// for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) +// Fields++; + + To_Fld = PlugSubAlloc(g, NULL, Lrecl + 1); + FldFormat = (PSZ*)PlugSubAlloc(g, NULL, sizeof(PSZ) * Fields); + memset(FldFormat, 0, sizeof(PSZ) * Fields); + FmtTest = (int*)PlugSubAlloc(g, NULL, sizeof(int) * Fields); + memset(FmtTest, 0, sizeof(int) * Fields); + + // Get the column formats + for (cdp = tdp->GetCols(); cdp; cdp = cdp->GetNext()) + if ((i = cdp->GetOffset() - 1) < Fields) { + if (!(pfm = cdp->GetFmt())) { + sprintf(g->Message, MSG(NO_FLD_FORMAT), i + 1, Name); + return true; + } // endif pfm + + // Roughly check the Fmt format + if ((n = strlen(pfm) - 2) < 4) { + sprintf(g->Message, MSG(BAD_FLD_FORMAT), i + 1, Name); + return true; + } // endif n + + FldFormat[i] = (PSZ)PlugSubAlloc(g, NULL, n + 5); + strcpy(FldFormat[i], pfm); + + if (!strcmp(pfm + n, "%m")) { + // This is a field that can be missing. Flag it so it can + // be handled with special processing. + FldFormat[i][n+1] = 'n'; // To have sscanf normal processing + FmtTest[i] = 2; + } else if (i+1 < Fields && strcmp(pfm + n, "%n")) { + // There are trailing characters after the field contents + // add a marker for the next field start position. + strcat(FldFormat[i], "%n"); + FmtTest[i] = 1; + } // endif's + + } // endif i + + } // endif Use + + return TDBCSV::OpenDB(g); + } // end of OpenDB + +/***********************************************************************/ +/* ReadBuffer: Physical read routine for the FMT access method. */ +/***********************************************************************/ +int TDBFMT::ReadBuffer(PGLOBAL g) + { + int i, len, n, deb, fin, nwp, pos = 0, rc; + bool bad = false; + + if ((rc = Txfp->ReadBuffer(g)) != RC_OK || !Fields) + return rc; + else + ++Linenum; + + if (trace > 1) + htrc("FMT: Row %d is '%s' rc=%d\n", Linenum, To_Line, rc); + + // Find the offsets and lengths of the columns for this row + for (i = 0; i < Fields; i++) { + if (!bad) { + deb = fin = -1; + + if (!FldFormat[i]) { + n = 0; + } else if (FmtTest[i] == 1) { + nwp = -1; + n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin, &nwp); + } else { + n = sscanf(To_Line + pos, FldFormat[i], &deb, To_Fld, &fin); + + if (n != 1 && (deb >= 0 || i == Fields - 1) && FmtTest[i] == 2) { + // Missing optional field, not an error + n = 1; + + if (i == Fields - 1) + fin = deb = 0; + else + fin = deb; + + } // endif n + + nwp = fin; + } // endif i + + if (n != 1 || deb < 0 || fin < 0 || nwp < 0) { + // This is to avoid a very strange sscanf bug occuring + // with fields that ends with a null character. + // This bug causes subsequent sscanf to return in error, + // so next lines are not parsed correctly. + sscanf("a", "%*c"); // Seems to reset things Ok + + if (CheckErr()) { + sprintf(g->Message, MSG(BAD_LINEFLD_FMT), Linenum, i + 1, Name); + return RC_FX; + } else if (Accept) + bad = true; + else + return RC_NF; + + } // endif n... + + } // endif !bad + + if (!bad) { + Offset[i] = pos + deb; + len = fin - deb; + } else { + nwp = 0; + Offset[i] = pos; + len = 0; + } // endif bad + +// if (Mode != MODE_UPDATE) + Fldlen[i] = len; +// else if (len > Fldlen[i]) { +// sprintf(g->Message, MSG(FIELD_TOO_LONG), i+1, To_Tdb->RowNumber(g)); +// return RC_FX; +// } else { +// strncpy(Field[i], To_Line + pos, len); +// Field[i][len] = '\0'; +// } // endif Mode + + pos += nwp; + } // endfor i + + if (bad) + Nerr++; + else + sscanf("a", "%*c"); // Seems to reset things Ok + + return rc; + } // end of ReadBuffer + +/***********************************************************************/ +/* Data Base write routine FMT file access method. */ +/***********************************************************************/ +int TDBFMT::WriteDB(PGLOBAL g) + { + sprintf(g->Message, MSG(FMT_WRITE_NIY), "FMT"); + return RC_FX; // NIY + } // end of WriteDB + +// ------------------------ CSVCOL functions ---------------------------- + +/***********************************************************************/ +/* CSVCOL public constructor */ +/***********************************************************************/ +CSVCOL::CSVCOL(PGLOBAL g, PCOLDEF cdp, PTDB tdbp, PCOL cprec, int i) + : DOSCOL(g, cdp, tdbp, cprec, i, "CSV") + { + Fldnum = Deplac - 1; + Deplac = 0; + } // end of CSVCOL constructor + +/***********************************************************************/ +/* CSVCOL constructor used for copying columns. */ +/* tdbp is the pointer to the new table descriptor. */ +/***********************************************************************/ +CSVCOL::CSVCOL(CSVCOL *col1, PTDB tdbp) : DOSCOL(col1, tdbp) + { + Fldnum = col1->Fldnum; + } // end of CSVCOL copy constructor + +/***********************************************************************/ +/* VarSize: This function tells UpdateDB whether or not the block */ +/* optimization file must be redone if this column is updated, even */ +/* it is not sorted or clustered. This applies to a blocked table, */ +/* because if it is updated using a temporary file, the block size */ +/* may be modified. */ +/***********************************************************************/ +bool CSVCOL::VarSize(void) + { + PTXF txfp = ((PTDBCSV)To_Tdb)->Txfp; + + if (txfp->IsBlocked() && txfp->GetUseTemp()) + // Blocked table using a temporary file + return true; + else + return false; + + } // end VarSize + +/***********************************************************************/ +/* ReadColumn: call DOSCOL::ReadColumn after having set the offet */ +/* and length of the field to read as calculated by TDBCSV::ReadDB. */ +/***********************************************************************/ +void CSVCOL::ReadColumn(PGLOBAL g) + { + int rc; + PTDBCSV tdbp = (PTDBCSV)To_Tdb; + + /*********************************************************************/ + /* If physical reading of the line was deferred, do it now. */ + /*********************************************************************/ + if (!tdbp->IsRead()) + if ((rc = tdbp->ReadBuffer(g)) != RC_OK) { + if (rc == RC_EF) + sprintf(g->Message, MSG(INV_DEF_READ), rc); + + longjmp(g->jumper[g->jump_level], 34); + } // endif + + if (tdbp->Mode != MODE_UPDATE) { + int colen = Long; // Column length + + // Set the field offset and length for this row + Deplac = tdbp->Offset[Fldnum]; // Field offset + Long = tdbp->Fldlen[Fldnum]; // Field length + + if (trace > 1) + htrc("CSV ReadColumn %s Fldnum=%d offset=%d fldlen=%d\n", + Name, Fldnum, Deplac, Long); + + if (Long > colen && tdbp->CheckErr()) { + Long = colen; // Restore column length + sprintf(g->Message, MSG(FLD_TOO_LNG_FOR), + Fldnum + 1, Name, To_Tdb->RowNumber(g), tdbp->GetFile(g)); + longjmp(g->jumper[g->jump_level], 34); + } // endif Long + + // Now do the reading + DOSCOL::ReadColumn(g); + + // Restore column length + Long = colen; + } else { // Mode Update + // Field have been copied in TDB Field array + PSZ fp = tdbp->Field[Fldnum]; + + Value->SetValue_psz(fp); + } // endif Mode + + } // end of ReadColumn + +/***********************************************************************/ +/* WriteColumn: The column is written in TDBCSV matching Field. */ +/***********************************************************************/ +void CSVCOL::WriteColumn(PGLOBAL g) + { + char *p, buf[32]; + int flen; + PTDBCSV tdbp = (PTDBCSV)To_Tdb; + + if (trace > 1) + htrc("CSV WriteColumn: col %s R%d coluse=%.4X status=%.4X\n", + Name, tdbp->GetTdb_No(), ColUse, Status); + + flen = GetLength(); + + if (trace > 1) + htrc("Lrecl=%d Long=%d field=%d coltype=%d colval=%p\n", + tdbp->Lrecl, Long, flen, Buf_Type, Value); + + /*********************************************************************/ + /* Check whether the new value has to be converted to Buf_Type. */ + /*********************************************************************/ + if (Value != To_Val) + Value->SetValue_pval(To_Val, false); // Convert the updated value + + /*********************************************************************/ + /* Get the string representation of the column value. */ + /*********************************************************************/ + p = Value->ShowValue(buf); + + if (trace > 1) + htrc("new length(%p)=%d\n", p, strlen(p)); + + if ((signed)strlen(p) > flen) { + sprintf(g->Message, MSG(BAD_FLD_LENGTH), Name, p, flen); + longjmp(g->jumper[g->jump_level], 34); + } // endif + + if (trace > 1) + htrc("buffer=%s\n", p); + + /*********************************************************************/ + /* Updating must be done also during the first pass so writing the */ + /* updated record can be checked for acceptable record length. */ + /*********************************************************************/ + if (Fldnum < 0) { + // This can happen for wrong offset value in XDB files + sprintf(g->Message, MSG(BAD_FIELD_RANK), Fldnum + 1, Name); + longjmp(g->jumper[g->jump_level], 34); + } else + strncpy(tdbp->Field[Fldnum], p, flen); + + if (trace > 1) + htrc(" col written: '%s'\n", p); + + } // end of WriteColumn + +/* ------------------------ End of TabFmt ---------------------------- */ -- cgit v1.2.1