spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mgaido91 <...@git.apache.org>
Subject [GitHub] spark pull request #19224: [SPARK-20990][SQL] Read all JSON documents in fil...
Date Tue, 16 Jan 2018 19:14:12 GMT
Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19224#discussion_r161858020
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
---
    @@ -361,3 +361,78 @@ class JacksonParser(
         }
       }
     }
    +
    +object JacksonParser {
    +  private[spark] def splitDocuments(input: InputStream) = new Iterator[String] {
    +
    +    private implicit class JsonCharacter(char: Char) {
    +      def isJsonObjectFinished(endToken: Option[Char]): Boolean = {
    +        endToken match {
    +          case None => char == '}' || char == ']'
    +          case Some(x) => char == x
    +        }
    +      }
    +    }
    +    private var currentChar: Char = input.read().toChar
    +    private var previousToken: Option[Char] = None
    +    private var nextRecord = readNext
    +
    +    override def hasNext: Boolean = nextRecord.isDefined
    +
    +    override def next(): String = {
    +      if (!hasNext) {
    +        throw new NoSuchElementException("End of stream")
    +      }
    +      val curRecord = nextRecord.get
    +      nextRecord = readNext
    +      curRecord
    +    }
    +
    +    private def moveToNextChar() = {
    +      if (!currentChar.isWhitespace) {
    +        previousToken = Some(currentChar)
    +      }
    +      currentChar = input.read().toChar
    +    }
    +
    +    private def readJsonObject: Option[String] = {
    +      val endToken = currentChar match {
    +        case '{' => Some('}')
    +        case '[' => Some(']')
    +        case _ => None
    +      }
    +
    +      val sb = new StringBuilder()
    +      sb.append(currentChar)
    +      while (!currentChar.isJsonObjectFinished(endToken) && input.available()
> 0) {
    +        moveToNextChar()
    +        currentChar match {
    +          case '{' | '[' =>
    --- End diff --
    
    yes, but then then escapes should be taken in account, etc. etc. Then we would nearly
have to rewrite the jackson library logic, which is something not desirable. This is the reason
why I said that. Yes sure, I would be happy if we can find a solution together, but I think
it is hard with this approach.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message