openoffice-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Dennis E. Hamilton" <dennis.hamil...@acm.org>
Subject RE: svn commit: r1739628 - in /openoffice/trunk/main: connectivity/source/drivers/flat/ETable.cxx tools/source/stream/stream.cxx
Date Sun, 17 Apr 2016 20:34:51 GMT
Does the rule about using "" to make a single quote inside a quoted field also apply?

 - Dennis

> -----Original Message-----
> From: damjan@apache.org [mailto:damjan@apache.org]
> Sent: Sunday, April 17, 2016 09:45
> To: commits@openoffice.apache.org
> Subject: svn commit: r1739628 - in /openoffice/trunk/main:
> connectivity/source/drivers/flat/ETable.cxx
> tools/source/stream/stream.cxx
> 
> Author: damjan
> Date: Sun Apr 17 16:44:43 2016
> New Revision: 1739628
> 
> URL: http://svn.apache.org/viewvc?rev=1739628&view=rev
> Log:
> Make CSV line parsers consistent with CSV field parsers.
> 
> Our CSV field parsing algorithms treats fields starting with a quote
> (immediately at the beginning of the row, or after the field delimiter)
> as
> quoted. A quoted field ends at the corresponding closing quote, and any
> remaining text between the closing quote and the next field delimeter or
> end
> of line is appended to the text already extracted from the field, but
> not
> processed further. Any quotes in this extra text are taken verbatim -
> they
> do not quote anything.
> 
> Our CSV line parsers were big hacks - they essentially read and
> concatenate
> lines until an even number of quote characters is found, and then feed
> this
> through the CSV field parsers.
> 
> This patch rewrites the line parsers to work exactly how the field
> parsers
> work. Text such as:
> "another" ",something else
> is now correctly parsed by both Calc and Base as:
> [another "],[something else]
> instead of breaking all further parsing.
> 
> Patch by: me
> 
> 
> Modified:
>     openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx
>     openoffice/trunk/main/tools/source/stream/stream.cxx
> 
> Modified:
> openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx
> URL:
> http://svn.apache.org/viewvc/openoffice/trunk/main/connectivity/source/d
> rivers/flat/ETable.cxx?rev=1739628&r1=1739627&r2=1739628&view=diff
> ========================================================================
> ======
> --- openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx
> (original)
> +++ openoffice/trunk/main/connectivity/source/drivers/flat/ETable.cxx
> Sun Apr 17 16:44:43 2016
> @@ -907,14 +907,64 @@ sal_Bool OFlatTable::readLine(QuotedToke
>          return sal_False;
> 
>      QuotedTokenizedString sLine = line; // check if the string
> continues on next line
> -    while( (sLine.GetString().GetTokenCount(m_cStringDelimiter) % 2) !=
> 1 )
> +    xub_StrLen nLastOffset = 0;
> +    bool isQuoted = false;
> +    bool isFieldStarting = true;
> +    while (true)
>      {
> -        m_pFileStream->ReadByteStringLine(sLine,nEncoding);
> -        if ( !m_pFileStream->IsEof() )
> +        bool wasQuote = false;
> +        const sal_Unicode *p;
> +        p = sLine.GetString().GetBuffer();
> +        p += nLastOffset;
> +
> +        while (*p)
> +        {
> +            if (isQuoted)
> +            {
> +                if (*p == m_cStringDelimiter)
> +                    wasQuote = !wasQuote;
> +                else
> +                {
> +                    if (wasQuote)
> +                    {
> +                        wasQuote = false;
> +                        isQuoted = false;
> +                        if (*p == m_cFieldDelimiter)
> +                            isFieldStarting = true;
> +                    }
> +                }
> +            }
> +            else
> +            {
> +                if (isFieldStarting)
> +                {
> +                    isFieldStarting = false;
> +                    if (*p == m_cStringDelimiter)
> +                        isQuoted = true;
> +                    else if (*p == m_cFieldDelimiter)
> +                        isFieldStarting = true;
> +                }
> +                else if (*p == m_cFieldDelimiter)
> +                    isFieldStarting = true;
> +            }
> +            ++p;
> +        }
> +
> +        if (wasQuote)
> +            isQuoted = false;
> +
> +        if (isQuoted)
>          {
> -            line.GetString().Append('\n');
> -            line.GetString() += sLine.GetString();
> -            sLine = line;
> +            nLastOffset = sLine.Len();
> +            m_pFileStream->ReadByteStringLine(sLine,nEncoding);
> +            if ( !m_pFileStream->IsEof() )
> +            {
> +                line.GetString().Append('\n');
> +                line.GetString() += sLine.GetString();
> +                sLine = line;
> +            }
> +            else
> +                break;
>          }
>          else
>              break;
> 
> Modified: openoffice/trunk/main/tools/source/stream/stream.cxx
> URL:
> http://svn.apache.org/viewvc/openoffice/trunk/main/tools/source/stream/s
> tream.cxx?rev=1739628&r1=1739627&r2=1739628&view=diff
> ========================================================================
> ======
> --- openoffice/trunk/main/tools/source/stream/stream.cxx (original)
> +++ openoffice/trunk/main/tools/source/stream/stream.cxx Sun Apr 17
> 16:44:43 2016
> @@ -1128,38 +1128,59 @@ sal_Bool SvStream::ReadCsvLine( String&
>      {
>          const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
>          xub_StrLen nLastOffset = 0;
> -        xub_StrLen nQuotes = 0;
> +        bool isQuoted = false;
> +        bool isFieldStarting = true;
>          while (!IsEof() && rStr.Len() < STRING_MAXLEN)
>          {
> +            bool wasQuote = false;
>              bool bBackslashEscaped = false;
> -            const sal_Unicode *p, *pStart;
> -            p = pStart = rStr.GetBuffer();
> +            const sal_Unicode *p;
> +            p = rStr.GetBuffer();
>              p += nLastOffset;
>              while (*p)
>              {
> -                if (nQuotes)
> +                if (isQuoted)
>                  {
>                      if (*p == cFieldQuote && !bBackslashEscaped)
> -                        ++nQuotes;
> -                    else if (bAllowBackslashEscape)
> +                        wasQuote = !wasQuote;
> +                    else
>                      {
> -                        if (*p == '\\')
> -                            bBackslashEscaped = !bBackslashEscaped;
> -                        else
> -                            bBackslashEscaped = false;
> +                        if (bAllowBackslashEscape)
> +                        {
> +                            if (*p == '\\')
> +                                bBackslashEscaped = !bBackslashEscaped;
> +                            else
> +                                bBackslashEscaped = false;
> +                        }
> +                        if (wasQuote)
> +                        {
> +                            wasQuote = false;
> +                            isQuoted = false;
> +                            if (lcl_UnicodeStrChr( pSeps, *p ))
> +                                isFieldStarting = true;
> +                        }
>                      }
>                  }
> -                else if (*p == cFieldQuote && (p == pStart ||
> -                            lcl_UnicodeStrChr( pSeps, p[-1])))
> -                    nQuotes = 1;
> -                // A quote character inside a field content does not
> start
> -                // a quote.
> +                else
> +                {
> +                    if (isFieldStarting)
> +                    {
> +                        isFieldStarting = false;
> +                        if (*p == cFieldQuote)
> +                            isQuoted = true;
> +                        else if (lcl_UnicodeStrChr( pSeps, *p ))
> +                            isFieldStarting = true;
> +                    }
> +                    else if (lcl_UnicodeStrChr( pSeps, *p ))
> +                        isFieldStarting = true;
> +                }
>                  ++p;
>              }
> 
> -            if (nQuotes % 2 == 0)
> -                break;
> -            else
> +            if (wasQuote)
> +                isQuoted = false;
> +
> +            if (isQuoted)
>              {
>                  nLastOffset = rStr.Len();
>                  String aNext;
> @@ -1167,6 +1188,8 @@ sal_Bool SvStream::ReadCsvLine( String&
>                  rStr += sal_Unicode(_LF);
>                  rStr += aNext;
>              }
> +            else
> +                break;
>          }
>      }
>      return nError == SVSTREAM_OK;



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@openoffice.apache.org
For additional commands, e-mail: dev-help@openoffice.apache.org


Mime
View raw message