asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianf...@apache.org
Subject [08/15] incubator-asterixdb git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray
Date Thu, 29 Oct 2015 04:44:59 GMT
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm b/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
index 9e639d6..4f811a2 100644
--- a/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/binary/find/find.1.adm
@@ -1,2 +1,2 @@
-[ [ true, null, null, null ]
+[ [ true, true, true, true, true, true, true, null, null, null ]
  ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm b/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
index 975a5f0..d1a46d0 100644
--- a/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/binary/subbinary/subbinary.1.adm
@@ -1,2 +1,2 @@
-[ true
+[ [ true, true, true, true, true, true, true, true, true, true, true, true, true, true ]
  ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
index 8429702..9b7d762 100644
--- a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/delete-from-loaded-dataset-with-index/delete-from-loaded-dataset-with-index.1.adm
@@ -1,5 +1,5 @@
 [ { "tweetid": "10", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("29.15,76.53"), "send-time": datetime("2008-01-26T10:10:00.000Z"), "referred-topics": {{ "verizon", "voice-clarity" }}, "message-text": " hate verizon its voice-clarity is OMG:(" }
-, { "tweetid": "6", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("47.51,83.99"), "send-time": datetime("2010-05-07T10:10:00.000Z"), "referred-topics": {{ "iphone", "voice-clarity" }}, "message-text": " like iphone the voice-clarity is good:)" }
 , { "tweetid": "11", "user": { "screen-name": "NilaMilliron_tw", "lang": "en", "friends_count": 445, "statuses_count": 164, "name": "Nila Milliron", "followers_count": 22649 }, "sender-location": point("37.59,68.42"), "send-time": datetime("2008-03-09T10:10:00.000Z"), "referred-topics": {{ "iphone", "platform" }}, "message-text": " can't stand iphone its platform is terrible" }
-, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }
+, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" } 
+, { "tweetid": "6", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("47.51,83.99"), "send-time": datetime("2010-05-07T10:10:00.000Z"), "referred-topics": {{ "iphone", "voice-clarity" }}, "message-text": " like iphone the voice-clarity is good:)" }
  ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
index 12c16d1..22b519d 100644
--- a/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/nested-index-dml/insert-into-loaded-dataset-with-index_02/insert-into-loaded-dataset-with-index_02.1.adm
@@ -1,5 +1,5 @@
 [ { "tweetid": "10", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("29.15,76.53"), "send-time": datetime("2008-01-26T10:10:00.000Z"), "referred-topics": {{ "verizon", "voice-clarity" }}, "message-text": " hate verizon its voice-clarity is OMG:(" }
+, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }
 , { "tweetid": "6", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("47.51,83.99"), "send-time": datetime("2010-05-07T10:10:00.000Z"), "referred-topics": {{ "iphone", "voice-clarity" }}, "message-text": " like iphone the voice-clarity is good:)" }
 , { "tweetid": "7", "user": { "screen-name": "ChangEwing_573", "lang": "en", "friends_count": 182, "statuses_count": 394, "name": "Chang Ewing", "followers_count": 32136 }, "sender-location": point("36.21,72.6"), "send-time": datetime("2011-08-25T10:10:00.000Z"), "referred-topics": {{ "samsung", "platform" }}, "message-text": " like samsung the platform is good" }
-, { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }
  ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with1/end-with1.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with2/end-with2.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with3/end-with3.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with4/end-with4.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm
deleted file mode 100644
index 3d1cf35..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/end-with5/end-with5.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "f1": true, "f2": false, "f3": true, "f4": false, "f5": true, "f6": false }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with1/ends-with1.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with2/ends-with2.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with3/ends-with3.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with4/ends-with4.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm
new file mode 100644
index 0000000..7ef4d67
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with5/ends-with5.1.adm
@@ -0,0 +1,2 @@
+[ { "f1": true, "f2": null, "f3": null, "f4": null, "f5": true, "f6": null }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm
new file mode 100644
index 0000000..9a8b5dc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with6/ends-with6.1.adm
@@ -0,0 +1,2 @@
+[ [ false, true ]
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm
new file mode 100644
index 0000000..f6ec4a3
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with7/ends-with7.1.adm
@@ -0,0 +1,7 @@
+[ false
+, false
+, true
+, true
+, true
+, true
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm
new file mode 100644
index 0000000..f696fc1
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/ends-with8/ends-with8.1.adm
@@ -0,0 +1,5 @@
+[ { "name": "I am Jones" }
+, { "name": "Jim Jones" }
+, { "name": "Marian Jones" }
+, { "name": "Phil Jones" }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm b/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm
deleted file mode 100644
index 9a8b5dc..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/ends-with_01/ends-with_01.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ [ false, true ]
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm b/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm
deleted file mode 100644
index f6ec4a3..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/endwith02/endwith02.1.adm
+++ /dev/null
@@ -1,7 +0,0 @@
-[ false
-, false
-, true
-, true
-, true
-, true
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm b/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm
deleted file mode 100644
index f696fc1..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/endwith03/endwith03.1.adm
+++ /dev/null
@@ -1,5 +0,0 @@
-[ { "name": "I am Jones" }
-, { "name": "Jim Jones" }
-, { "name": "Marian Jones" }
-, { "name": "Phil Jones" }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm b/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
index 51bc6cf..36fc447 100644
--- a/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/string/matches11/matches11.1.adm
@@ -1,6 +1,7 @@
-[ false
-, false
+[ null
+, null
 , false
 , false
+, null
 , false
  ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm b/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
index 5721c80..3e9f513 100644
--- a/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/string/matchesnull/matchesnull.1.adm
@@ -1,2 +1,2 @@
-[ { "result1": false, "result2": false, "result3": true, "result4": false, "result5": false, "result6": true }
+[ { "result1": null, "result2": null, "result3": null, "result4": null, "result5": null, "result6": null }
  ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with1/start-with1.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with2/start-with2.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm
deleted file mode 100644
index 792013a..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with3/start-with3.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": true }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm
deleted file mode 100644
index 3d1cf35..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with4/start-with4.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "f1": true, "f2": false, "f3": true, "f4": false, "f5": true, "f6": false }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm
deleted file mode 100644
index e452ee2..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/start-with5/start-with5.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ { "result1": false }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with1/starts-with1.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with2/starts-with2.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm
new file mode 100644
index 0000000..792013a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with3/starts-with3.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": true }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm
new file mode 100644
index 0000000..7ef4d67
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with4/starts-with4.1.adm
@@ -0,0 +1,2 @@
+[ { "f1": true, "f2": null, "f3": null, "f4": null, "f5": true, "f6": null }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm
new file mode 100644
index 0000000..e452ee2
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with5/starts-with5.1.adm
@@ -0,0 +1,2 @@
+[ { "result1": false }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm
new file mode 100644
index 0000000..9a8b5dc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with6/starts-with6.1.adm
@@ -0,0 +1,2 @@
+[ [ false, true ]
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm
new file mode 100644
index 0000000..eae1439
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with7/starts-with7.1.adm
@@ -0,0 +1,16 @@
+[ null
+, true
+, false
+, true
+, false
+, null 
+, true
+, true
+, false
+, true
+, false
+, true
+, true
+, false
+, false
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm
new file mode 100644
index 0000000..3ff35f7
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/starts-with8/starts-with8.1.adm
@@ -0,0 +1,6 @@
+[ { "name": "John Doe" }
+, { "name": "John Smith" }
+, { "name": "John Wayne" }
+, { "name": "Johnny Walker" }
+, { "name": "Johnson Ben" }
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm b/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm
deleted file mode 100644
index 9a8b5dc..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/starts-with_01/starts-with_01.1.adm
+++ /dev/null
@@ -1,2 +0,0 @@
-[ [ false, true ]
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm b/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm
deleted file mode 100644
index 933ea38..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/startwith02/startwith02.1.adm
+++ /dev/null
@@ -1,15 +0,0 @@
-[ true
-, false
-, true
-, false
-, false
-, true
-, true
-, false
-, true
-, false
-, true
-, true
-, false
-, false
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm b/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm
deleted file mode 100644
index 3ff35f7..0000000
--- a/asterix-app/src/test/resources/runtimets/results/string/startwith03/startwith03.1.adm
+++ /dev/null
@@ -1,6 +0,0 @@
-[ { "name": "John Doe" }
-, { "name": "John Smith" }
-, { "name": "John Wayne" }
-, { "name": "Johnny Walker" }
-, { "name": "Johnson Ben" }
- ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm b/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
index 84836dc..07187b3 100644
--- a/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/string/string-equal4/string-equal4.1.adm
@@ -1,2 +1,2 @@
-[ { "result1": true, "result3": false, "result4": false, "result5": true }
+[ { "result1": true, "result3": null, "result4": null, "result5": null }
  ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm b/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm
new file mode 100644
index 0000000..12b6cef
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/string/varlen-encoding/varlen-encoding.1.adm
@@ -0,0 +1,2 @@
+[ [ 127, 128, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216 ]
+ ]

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-app/src/test/resources/runtimets/testsuite.xml
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index b6417bc..097805e 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -18,9 +18,9 @@
  !-->
 <!DOCTYPE test-suite [
 
-<!ENTITY RecordsQueries SYSTEM "queries/records/RecordsQueries.xml">
+        <!ENTITY RecordsQueries SYSTEM "queries/records/RecordsQueries.xml">
 
-]>
+        ]>
 <test-suite
         xmlns="urn:xml.testframework.asterix.apache.org"
         ResultOffsetPath="results"
@@ -4810,43 +4810,43 @@
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="end-with1">
-                <output-dir compare="Text">end-with1</output-dir>
+            <compilation-unit name="ends-with1">
+                <output-dir compare="Text">ends-with1</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="end-with2">
-                <output-dir compare="Text">end-with2</output-dir>
+            <compilation-unit name="ends-with2">
+                <output-dir compare="Text">ends-with2</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="end-with3">
-                <output-dir compare="Text">end-with3</output-dir>
+            <compilation-unit name="ends-with3">
+                <output-dir compare="Text">ends-with3</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="end-with4">
-                <output-dir compare="Text">end-with4</output-dir>
+            <compilation-unit name="ends-with4">
+                <output-dir compare="Text">ends-with4</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="end-with5">
-                <output-dir compare="Text">end-with5</output-dir>
+            <compilation-unit name="ends-with5">
+                <output-dir compare="Text">ends-with5</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="ends-with_01">
-                <output-dir compare="Text">ends-with_01</output-dir>
+            <compilation-unit name="ends-with6">
+                <output-dir compare="Text">ends-with6</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="endwith02">
-                <output-dir compare="Text">endwith02</output-dir>
+            <compilation-unit name="ends-with7">
+                <output-dir compare="Text">ends-with7</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="endwith03">
-                <output-dir compare="Text">endwith03</output-dir>
+            <compilation-unit name="ends-with8">
+                <output-dir compare="Text">ends-with8</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
@@ -4975,44 +4975,44 @@
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="start-with1">
-                <output-dir compare="Text">start-with1</output-dir>
+            <compilation-unit name="starts-with1">
+                <output-dir compare="Text">starts-with1</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="start-with2">
-                <output-dir compare="Text">start-with2</output-dir>
+            <compilation-unit name="starts-with2">
+                <output-dir compare="Text">starts-with2</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="start-with3">
-                <output-dir compare="Text">start-with3</output-dir>
+            <compilation-unit name="starts-with3">
+                <output-dir compare="Text">starts-with3</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="start-with4">
-                <output-dir compare="Text">start-with4</output-dir>
+            <compilation-unit name="starts-with4">
+                <output-dir compare="Text">starts-with4</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="start-with5">
-                <output-dir compare="Text">start-with5</output-dir>
+            <compilation-unit name="starts-with5">
+                <output-dir compare="Text">starts-with5</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="starts-with_01">
-                <output-dir compare="Text">starts-with_01</output-dir>
+            <compilation-unit name="starts-with6">
+                <output-dir compare="Text">starts-with6</output-dir>
             </compilation-unit>
         </test-case>
         <test-case FilePath="string">
-            <compilation-unit name="startwith02">
-                <output-dir compare="Text">startwith02</output-dir>
+            <compilation-unit name="starts-with7">
+                <output-dir compare="Text">starts-with7</output-dir>
             </compilation-unit>
         </test-case>
         <!--
         <test-case FilePath="string">
-          <compilation-unit name="startwith03">
-            <output-dir compare="Text">startwith03</output-dir>
+          <compilation-unit name="starts-with8">
+            <output-dir compare="Text">starts-with8</output-dir>
           </compilation-unit>
         </test-case>
         -->
@@ -5196,6 +5196,11 @@
                 <output-dir compare="Text">uppercase</output-dir>
             </compilation-unit>
         </test-case>
+        <test-case FilePath="string">
+            <compilation-unit name="varlen-encoding">
+                <output-dir compare="Text">varlen-encoding</output-dir>
+            </compilation-unit>
+        </test-case>
     </test-group>
     <test-group name="subset-collection">
         <test-case FilePath="subset-collection">
@@ -6206,6 +6211,11 @@
                 <output-dir compare="Text">big_object_join</output-dir>
             </compilation-unit>
         </test-case>
+        <test-case FilePath="big-object">
+            <compilation-unit name="big_object_load">
+                <output-dir compare="Text">big_object_load</output-dir>
+            </compilation-unit>
+        </test-case>
     </test-group>
     <test-group name="external-indexing">
         <test-case FilePath="external-indexing">

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java b/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java
deleted file mode 100644
index 468aca8..0000000
--- a/asterix-common/src/main/java/org/apache/asterix/common/utils/UTF8CharSequence.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.common.utils;
-
-import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
-
-public class UTF8CharSequence implements CharSequence {
-
-    private int start;
-    private int len;
-    private char[] buf;
-
-    public UTF8CharSequence(IValueReference valueRef, int start) {
-        reset(valueRef, start);
-    }
-
-    public UTF8CharSequence() {
-    }
-
-    @Override
-    public char charAt(int index) {
-        if (index >= len || index < 0) {
-            throw new IndexOutOfBoundsException("No index " + index + " for string of length " + len);
-        }
-        return buf[index];
-    }
-
-    @Override
-    public int length() {
-        return len;
-    }
-
-    @Override
-    public CharSequence subSequence(int start, int end) {
-        UTF8CharSequence carSeq = new UTF8CharSequence();
-        carSeq.len = end - start;
-        if (end != start) {
-            carSeq.buf = new char[carSeq.len];
-            System.arraycopy(buf, start, carSeq.buf, 0, carSeq.len);
-        }
-        return carSeq;
-    }
-
-    public void reset(IValueReference valueRef, int start) {
-        this.start = start;
-        resetLength(valueRef);
-        if (buf == null || buf.length < len) {
-            buf = new char[len];
-        }
-        int sStart = start + 2;
-        int c = 0;
-        int i = 0;
-        byte[] bytes = valueRef.getByteArray();
-        while (c < len) {
-            buf[i++] = UTF8StringPointable.charAt(bytes, sStart + c);
-            c += UTF8StringPointable.charSize(bytes, sStart + c);
-        }
-
-    }
-
-    private void resetLength(IValueReference valueRef) {
-        this.len = UTF8StringPointable.getUTFLength(valueRef.getByteArray(), start);
-    }
-
-    @Override
-    public String toString() {
-        StringBuffer bf = new StringBuffer();
-        if (buf != null) {
-            for (int i = 0; i < buf.length; i++) {
-                bf.append(buf[i]);
-            }
-        }
-        return new String(bf);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
----------------------------------------------------------------------
diff --git a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
index 8ff524e..6cc7abb 100644
--- a/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
+++ b/asterix-common/src/test/java/org/apache/asterix/test/aql/TestsUtils.java
@@ -35,7 +35,18 @@ import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.commons.httpclient.*;
+import org.apache.asterix.common.config.GlobalConfig;
+import org.apache.asterix.testframework.context.TestCaseContext;
+import org.apache.asterix.testframework.context.TestCaseContext.OutputFormat;
+import org.apache.asterix.testframework.context.TestFileContext;
+import org.apache.asterix.testframework.xml.TestCase.CompilationUnit;
+import org.apache.asterix.testframework.xml.TestGroup;
+import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpMethod;
+import org.apache.commons.httpclient.HttpMethodBase;
+import org.apache.commons.httpclient.HttpStatus;
+import org.apache.commons.httpclient.NameValuePair;
 import org.apache.commons.httpclient.methods.GetMethod;
 import org.apache.commons.httpclient.methods.PostMethod;
 import org.apache.commons.httpclient.methods.StringRequestEntity;
@@ -43,12 +54,6 @@ import org.apache.commons.httpclient.params.HttpMethodParams;
 import org.apache.commons.io.IOUtils;
 import org.json.JSONObject;
 
-import org.apache.asterix.common.config.GlobalConfig;
-import org.apache.asterix.testframework.context.TestCaseContext;
-import org.apache.asterix.testframework.context.TestCaseContext.OutputFormat;
-import org.apache.asterix.testframework.context.TestFileContext;
-import org.apache.asterix.testframework.xml.TestCase.CompilationUnit;
-
 public class TestsUtils {
 
     private static final Logger LOGGER = Logger.getLogger(TestsUtils.class.getName());
@@ -383,6 +388,11 @@ public class TestsUtils {
 
     public static void executeTest(String actualPath, TestCaseContext testCaseCtx, ProcessBuilder pb,
             boolean isDmlRecoveryTest) throws Exception {
+        executeTest(actualPath, testCaseCtx, pb, isDmlRecoveryTest, null);
+    }
+
+    public static void executeTest(String actualPath, TestCaseContext testCaseCtx, ProcessBuilder pb,
+            boolean isDmlRecoveryTest, TestGroup failedGroup) throws Exception {
 
         File testFile;
         File expectedResultFile;
@@ -395,7 +405,8 @@ public class TestsUtils {
 
         List<CompilationUnit> cUnits = testCaseCtx.getTestCase().getCompilationUnit();
         for (CompilationUnit cUnit : cUnits) {
-            LOGGER.info("Starting [TEST]: " + testCaseCtx.getTestCase().getFilePath() + "/" + cUnit.getName() + " ... ");
+            LOGGER.info(
+                    "Starting [TEST]: " + testCaseCtx.getTestCase().getFilePath() + "/" + cUnit.getName() + " ... ");
             testFileCtxs = testCaseCtx.getTestFiles(cUnit);
             expectedResultFileCtxs = testCaseCtx.getExpectedResultFiles(cUnit);
             for (TestFileContext ctx : testFileCtxs) {
@@ -524,16 +535,22 @@ public class TestsUtils {
                     }
 
                 } catch (Exception e) {
+
                     System.err.println("testFile " + testFile.toString() + " raised an exception:");
+
                     e.printStackTrace();
                     if (cUnit.getExpectedError().isEmpty()) {
                         System.err.println("...Unexpected!");
+                        if (failedGroup != null) {
+                            failedGroup.getTestCase().add(testCaseCtx.getTestCase());
+                        }
                         throw new Exception("Test \"" + testFile + "\" FAILED!", e);
                     } else {
                         LOGGER.info("[TEST]: " + testCaseCtx.getTestCase().getFilePath() + "/" + cUnit.getName()
                                 + " failed as expected: " + e.getMessage());
                         System.err.println("...but that was expected.");
                     }
+
                 }
             }
         }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
index 8029b8f..ebba65b 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/FileIndexTupleTranslator.java
@@ -41,7 +41,8 @@ import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
 
 @SuppressWarnings("unchecked")
 public class FileIndexTupleTranslator {
-    private ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(FilesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFieldCount());
+    private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
+    private ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFieldCount());
     private RecordBuilder recordBuilder = new RecordBuilder();
     private ArrayBackedValueStorage fieldValue = new ArrayBackedValueStorage();
     private AMutableInt32 aInt32 = new AMutableInt32(0);
@@ -57,11 +58,11 @@ public class FileIndexTupleTranslator {
         tupleBuilder.reset();
         //File Number
         aInt32.setValue(file.getFileNumber());
-        FilesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFields()[0].serialize(aInt32, tupleBuilder.getDataOutput());
+        filesIndexDescription.FILE_INDEX_RECORD_DESCRIPTOR.getFields()[0].serialize(aInt32, tupleBuilder.getDataOutput());
         tupleBuilder.addFieldEndOffset();
         
         //File Record
-        recordBuilder.reset(FilesIndexDescription.EXTERNAL_FILE_RECORD_TYPE);
+        recordBuilder.reset(filesIndexDescription.EXTERNAL_FILE_RECORD_TYPE);
         // write field 0 (File Name)
         fieldValue.reset();
         aString.setValue(file.getFileName());

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
index e80247f..d7fa4f2 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HiveObjectParser.java
@@ -58,6 +58,7 @@ import org.apache.asterix.om.util.NonTaggedFormatUtil;
 import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 
 @SuppressWarnings("deprecation")
 public class HiveObjectParser implements IAsterixHDFSRecordParser {
@@ -75,6 +76,7 @@ public class HiveObjectParser implements IAsterixHDFSRecordParser {
     private UnorderedListBuilder unorderedListBuilder;
     private boolean initialized = false;
     private List<StructField> fieldRefs;
+    private UTF8StringWriter utf8Writer = new UTF8StringWriter();
 
     @SuppressWarnings({ "unchecked" })
     @Override
@@ -308,7 +310,7 @@ public class HiveObjectParser implements IAsterixHDFSRecordParser {
     }
 
     private void parseString(Object obj, StringObjectInspector foi, DataOutput dataOutput) throws IOException {
-        dataOutput.writeUTF(foi.getPrimitiveJavaObject(obj));
+        utf8Writer.writeUTF8(foi.getPrimitiveJavaObject(obj), dataOutput);
     }
 
     private void parseTime(Object obj, TimestampObjectInspector foi, DataOutput dataOutput) throws IOException {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
index 0aa761c..d8865b4 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/operators/ExternalIndexBulkModifyOperatorNodePushable.java
@@ -36,9 +36,10 @@ import org.apache.hyracks.storage.am.lsm.common.api.ITwoPCIndex;
 
 public class ExternalIndexBulkModifyOperatorNodePushable extends IndexBulkLoadOperatorNodePushable {
 
+    private final FilesIndexDescription filesIndexDescription = new FilesIndexDescription();
     private final int[] deletedFiles;
     private ArrayTupleBuilder buddyBTreeTupleBuilder = new ArrayTupleBuilder(
-            FilesIndexDescription.FILE_BUDDY_BTREE_RECORD_DESCRIPTOR.getFieldCount());
+            filesIndexDescription.FILE_BUDDY_BTREE_RECORD_DESCRIPTOR.getFieldCount());
     private AMutableInt32 fileNumber = new AMutableInt32(0);
     private ArrayTupleReference deleteTuple = new ArrayTupleReference();
 
@@ -65,7 +66,7 @@ public class ExternalIndexBulkModifyOperatorNodePushable extends IndexBulkLoadOp
             // Delete files
             for (int i = 0; i < deletedFiles.length; i++) {
                 fileNumber.setValue(deletedFiles[i]);
-                FilesIndexDescription.getBuddyBTreeTupleFromFileNumber(deleteTuple, buddyBTreeTupleBuilder, fileNumber);
+                filesIndexDescription.getBuddyBTreeTupleFromFileNumber(deleteTuple, buddyBTreeTupleBuilder, fileNumber);
                 ((ITwoPCIndexBulkLoader) bulkLoader).delete(deleteTuple);
             }
         } catch (Exception e) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
index 84e3d38..6010e54 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectAccessors.java
@@ -226,27 +226,22 @@ public class JObjectAccessors {
     }
 
     public static class JStringAccessor implements IJObjectAccessor {
-        private final ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
+        private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
 
         @Override
         public IJObject access(IVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool)
                 throws HyracksDataException {
-            IJObject jObject = objectPool.allocate(BuiltinType.ASTRING);
+            byte[] b = pointable.getByteArray();
+            int s = pointable.getStartOffset();
+            int l = pointable.getLength();
 
-            try {
-                byte byteArray[] = pointable.getByteArray();
-                int len = pointable.getLength()-3;
-                int off = pointable.getStartOffset()+3;
-                baaos.reset();
-                if(off >= 0 && off <= byteArray.length && len >= 0 && off + len - byteArray.length <= 0) {
-                    baaos.write(byteArray, off, len);
-                    ((JString) jObject).setValue(JObjectUtil.getNormalizedString(baaos.toString("UTF-8")));
-                } else {
-                    ((JString) jObject).setValue("");
-                }
-            } catch (IOException e) {
-                e.printStackTrace();
-            }
+            String v = null;
+            v = aStringSerDer.deserialize(
+                    new DataInputStream(new ByteArrayInputStream(b, s + 1, l - 1))).getStringValue();
+            JObjectUtil.getNormalizedString(v);
+
+            IJObject jObject = objectPool.allocate(BuiltinType.ASTRING);
+            ((JString) jObject).setValue(JObjectUtil.getNormalizedString(v));
             return jObject;
         }
     }
@@ -449,6 +444,7 @@ public class JObjectAccessors {
         private final JRecord jRecord;
         private final IJObject[] jObjects;
         private final LinkedHashMap<String, IJObject> openFields;
+        private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
 
         public JRecordAccessor(ARecordType recordType, IObjectPool<IJObject, IAType> objectPool) {
             this.typeInfo = new TypeInfo(objectPool, null, null);
@@ -491,7 +487,8 @@ public class JObjectAccessors {
                                 // value is null
                                 fieldObject = null;
                             } else {
-                                fieldObject = pointableVisitor.visit((AListVisitablePointable) fieldPointable, typeInfo);
+                                fieldObject = pointableVisitor
+                                        .visit((AListVisitablePointable) fieldPointable, typeInfo);
                             }
                             break;
                         case ANY:
@@ -505,7 +502,7 @@ public class JObjectAccessors {
                         byte[] b = fieldName.getByteArray();
                         int s = fieldName.getStartOffset();
                         int l = fieldName.getLength();
-                        String v = AStringSerializerDeserializer.INSTANCE.deserialize(
+                        String v = aStringSerDer.deserialize(
                                 new DataInputStream(new ByteArrayInputStream(b, s + 1, l - 1))).getStringValue();
                         openFields.put(v, fieldObject);
                     }
@@ -540,7 +537,8 @@ public class JObjectAccessors {
         }
 
         @Override
-        public IJObject access(AListVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool, IAType listType,
+        public IJObject access(AListVisitablePointable pointable, IObjectPool<IJObject, IAType> objectPool,
+                IAType listType,
                 JObjectPointableVisitor pointableVisitor) throws HyracksDataException {
             List<IVisitablePointable> items = pointable.getItems();
             List<IVisitablePointable> itemTags = pointable.getItemTags();

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
index 5bf87bc..a0710ff 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjectUtil.java
@@ -392,7 +392,7 @@ public class JObjectUtil {
                         dis.readInt();
                     }
                     for (int i = 0; i < numberOfOpenFields; i++) {
-                        fieldNames[i] = AStringSerializerDeserializer.INSTANCE.deserialize(dis).getStringValue();
+                        fieldNames[i] = new AStringSerializerDeserializer().deserialize(dis).getStringValue();
                         ATypeTag openFieldTypeTag = SerializerDeserializerUtil.deserializeTag(dis);
                         openFields[i] = getJType(openFieldTypeTag, null, dis, objectPool);
                         fieldTypes[i] = openFields[i].getIAObject().getType();

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
----------------------------------------------------------------------
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
index c3843e1..02f7b4b 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/library/java/JObjects.java
@@ -367,6 +367,8 @@ public class JObjects {
 
     public static final class JString extends JObject {
 
+        private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
+
         public JString(String v) {
             super(new AMutableString(v));
         }
@@ -388,7 +390,7 @@ public class JObjects {
                     throw new HyracksDataException(e);
                 }
             }
-            AStringSerializerDeserializer.INSTANCE.serialize((AString) value, dataOutput);
+            aStringSerDer.serialize((AString) value, dataOutput);
         }
 
         @Override
@@ -976,6 +978,8 @@ public class JObjects {
         private ARecordType recordType;
         private IJObject[] fields;
         private Map<String, IJObject> openFields;
+        private final AStringSerializerDeserializer aStringSerDer = new AStringSerializerDeserializer();
+
 
         public JRecord(ARecordType recordType, IJObject[] fields) {
             this.recordType = recordType;
@@ -1104,7 +1108,7 @@ public class JObjects {
                         openFieldValue.reset();
                         nameValue.setValue(entry.getKey());
                         openFieldName.getDataOutput().write(ATypeTag.STRING.serialize());
-                        AStringSerializerDeserializer.INSTANCE.serialize(nameValue, openFieldName.getDataOutput());
+                        aStringSerDer.serialize(nameValue, openFieldName.getDataOutput());
                         entry.getValue().serialize(openFieldValue.getDataOutput(), true);
                         recordBuilder.addField(openFieldName, openFieldValue);
                     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/pom.xml
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/pom.xml b/asterix-fuzzyjoin/pom.xml
index a96c926..f95bcc1 100644
--- a/asterix-fuzzyjoin/pom.xml
+++ b/asterix-fuzzyjoin/pom.xml
@@ -66,6 +66,14 @@
         <groupId>org.apache.hyracks</groupId>
         <artifactId>hyracks-api</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hyracks</groupId>
+      <artifactId>hyracks-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hyracks</groupId>
+      <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+    </dependency>
   </dependencies>
 
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
index 8b6b23d..ea57fd2 100644
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
+++ b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -21,13 +21,11 @@ package org.apache.asterix.fuzzyjoin.similarity;
 
 import java.util.Arrays;
 
-import org.apache.asterix.fuzzyjoin.tokenizer.StringUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
 
-    private final int utf8SizeIndicatorSize = 2;
-
     // dp implementation only needs 2 rows
     private final int rows = 2;
     private int cols;
@@ -159,8 +157,13 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
     // faster implementation for common case of string edit distance
     public int UTF8StringEditDistance(byte[] bytes, int fsStart, int ssStart) {
 
-        int fsLen = StringUtils.getStrLen(bytes, fsStart);
-        int ssLen = StringUtils.getStrLen(bytes, ssStart);
+        int fsLen = UTF8StringUtil.getStringLength(bytes, fsStart);
+        int ssLen = UTF8StringUtil.getStringLength(bytes, ssStart);
+
+        int fsUtfLen = UTF8StringUtil.getUTFLength(bytes, fsStart);
+        int ssUtfLen = UTF8StringUtil.getUTFLength(bytes, ssStart);
+        int fsMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fsUtfLen);
+        int ssMetaLen = UTF8StringUtil.getNumBytesToStoreLength(ssUtfLen);
 
         // reuse existing matrix if possible
         if (ssLen >= cols) {
@@ -168,8 +171,8 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
             matrix = new int[rows][cols];
         }
 
-        int fsDataStart = fsStart + utf8SizeIndicatorSize;
-        int ssDataStart = ssStart + utf8SizeIndicatorSize;
+        int fsDataStart = fsStart + fsMetaLen;
+        int ssDataStart = ssStart + ssMetaLen;
 
         // init matrix
         for (int i = 0; i <= ssLen; i++) {
@@ -183,19 +186,19 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
         int fsPos = fsDataStart;
         for (int i = 1; i <= fsLen; i++) {
             matrix[currRow][0] = i;
-            char fsChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
+            char fsChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, fsPos));
 
             int ssPos = ssDataStart;
             for (int j = 1; j <= ssLen; j++) {
-                char ssChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
+                char ssChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, ssPos));
 
                 matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1, matrix[currRow][j - 1] + 1),
                         matrix[prevRow][j - 1] + (fsChar == ssChar ? 0 : 1));
 
-                ssPos += StringUtils.charSize(bytes, ssPos);
+                ssPos += UTF8StringUtil.charSize(bytes, ssPos);
             }
 
-            fsPos += StringUtils.charSize(bytes, fsPos);
+            fsPos += UTF8StringUtil.charSize(bytes, fsPos);
 
             int tmp = currRow;
             currRow = prevRow;
@@ -207,8 +210,13 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
 
     public int UTF8StringEditDistance(byte[] bytes, int fsStart, int ssStart, int edThresh) {
 
-        int fsStrLen = StringUtils.getStrLen(bytes, fsStart);
-        int ssStrLen = StringUtils.getStrLen(bytes, ssStart);
+        int fsStrLen = UTF8StringUtil.getStringLength(bytes, fsStart);
+        int ssStrLen = UTF8StringUtil.getStringLength(bytes, ssStart);
+
+        int fsUtfLen = UTF8StringUtil.getUTFLength(bytes, fsStart);
+        int ssUtfLen = UTF8StringUtil.getUTFLength(bytes, ssStart);
+        int fsMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fsUtfLen);
+        int ssMetaLen = UTF8StringUtil.getNumBytesToStoreLength(ssUtfLen);
 
         // length filter
         if (Math.abs(fsStrLen - ssStrLen) > edThresh) {
@@ -220,25 +228,25 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
         Arrays.fill(ssLcCount, 0);
 
         // compute letter counts for first string
-        int fsPos = fsStart + utf8SizeIndicatorSize;
-        int fsEnd = fsPos + StringUtils.getUTFLen(bytes, fsStart);;
+        int fsPos = fsStart + fsMetaLen;
+        int fsEnd = fsPos + fsUtfLen;;
         while (fsPos < fsEnd) {
-            char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
+            char c = Character.toLowerCase(UTF8StringUtil.charAt(bytes, fsPos));
             if (c < 128) {
                 fsLcCount[c]++;
             }
-            fsPos += StringUtils.charSize(bytes, fsPos);
+            fsPos += UTF8StringUtil.charSize(bytes, fsPos);
         }
 
         // compute letter counts for second string
-        int ssPos = ssStart + utf8SizeIndicatorSize;
-        int ssEnd = ssPos + StringUtils.getUTFLen(bytes, ssStart);
+        int ssPos = ssStart + ssMetaLen;
+        int ssEnd = ssPos + ssUtfLen;
         while (ssPos < ssEnd) {
-            char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
+            char c = Character.toLowerCase(UTF8StringUtil.charAt(bytes, ssPos));
             if (c < 128) {
                 ssLcCount[c]++;
             }
-            ssPos += StringUtils.charSize(bytes, ssPos);
+            ssPos += UTF8StringUtil.charSize(bytes, ssPos);
         }
 
         // apply filter
@@ -269,8 +277,14 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
     // checks whether the first string contains a similar string to the second string
     public int UTF8StringEditDistanceContains(byte[] bytes, int stringStart, int patternStart, int edThresh) {
 
-        int stringLen = StringUtils.getStrLen(bytes, stringStart);
-        int patternLen = StringUtils.getStrLen(bytes, patternStart);
+        int stringLen = UTF8StringUtil.getStringLength(bytes, stringStart);
+        int patternLen = UTF8StringUtil.getStringLength(bytes, patternStart);
+
+        int stringUTFLen = UTF8StringUtil.getUTFLength(bytes, stringStart);
+        int stringMetaLen = UTF8StringUtil.getNumBytesToStoreLength(stringUTFLen);
+
+        int patternUTFLen = UTF8StringUtil.getUTFLength(bytes, patternStart);
+        int patternMetaLen = UTF8StringUtil.getNumBytesToStoreLength(patternUTFLen);
 
         // reuse existing matrix if possible
         if (patternLen >= cols) {
@@ -278,8 +292,8 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
             matrix = new int[rows][cols];
         }
 
-        int stringDataStart = stringStart + utf8SizeIndicatorSize;
-        int patternDataStart = patternStart + utf8SizeIndicatorSize;
+        int stringDataStart = stringStart +  stringMetaLen;
+        int patternDataStart = patternStart + patternMetaLen;
 
         // init matrix
         for (int i = 0; i <= patternLen; i++) {
@@ -293,23 +307,23 @@ public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
         int stringPos = stringDataStart;
         for (int i = 1; i <= stringLen; i++) {
             matrix[currRow][0] = 0;
-            char stringChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, stringPos));
+            char stringChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, stringPos));
 
             int patternPos = patternDataStart;
             for (int j = 1; j <= patternLen; j++) {
-                char patternChar = StringUtils.toLowerCase(StringUtils.charAt(bytes, patternPos));
+                char patternChar = Character.toLowerCase(UTF8StringUtil.charAt(bytes, patternPos));
 
                 matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1, matrix[currRow][j - 1] + 1),
                         matrix[prevRow][j - 1] + (stringChar == patternChar ? 0 : 1));
 
-                patternPos += StringUtils.charSize(bytes, patternPos);
+                patternPos += UTF8StringUtil.charSize(bytes, patternPos);
 
                 if (j == patternLen && matrix[currRow][patternLen] < minEd) {
                     minEd = matrix[currRow][patternLen];
                 }
             }
 
-            stringPos += StringUtils.charSize(bytes, stringPos);
+            stringPos += UTF8StringUtil.charSize(bytes, stringPos);
 
             int tmp = currRow;
             currRow = prevRow;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
deleted file mode 100644
index fe90e05..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8StringBinaryTokenizer.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import org.apache.asterix.fuzzyjoin.IntArray;
-
-public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokenizer {
-
-    protected byte[] data;
-    protected int start;
-    protected int length;
-    protected int tokenLength;
-    protected int index;
-    protected int utf8Length;
-
-    protected final IntArray tokensStart;
-    protected final IntArray tokensLength;
-    protected final IToken token;
-
-    protected final boolean ignoreTokenCount;
-    protected final boolean sourceHasTypeTag;
-
-    public AbstractUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
-            ITokenFactory tokenFactory) {
-        this.ignoreTokenCount = ignoreTokenCount;
-        this.sourceHasTypeTag = sourceHasTypeTag;
-        if (!ignoreTokenCount) {
-            tokensStart = new IntArray();
-            tokensLength = new IntArray();
-        } else {
-            tokensStart = null;
-            tokensLength = null;
-        }
-        token = tokenFactory.createToken();
-    }
-
-    @Override
-    public IToken getToken() {
-        return token;
-    }
-
-    @Override
-    public void reset(byte[] data, int start, int length) {
-        this.start = start;
-        index = this.start;
-        if (sourceHasTypeTag) {
-            index++; // skip type tag
-        }
-        utf8Length = StringUtils.getUTFLen(data, index);
-        index += 2; // skip utf8 length indicator
-        this.data = data;
-        this.length = length + start;
-
-        tokenLength = 0;
-        if (!ignoreTokenCount) {
-            tokensStart.reset();
-            tokensLength.reset();
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
deleted file mode 100644
index 835d591..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8Token.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-public abstract class AbstractUTF8Token implements IToken {
-    public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
-
-    protected int length;
-    protected int tokenLength;
-    protected int start;
-    protected int tokenCount;
-    protected byte[] data;
-    protected final byte tokenTypeTag;
-    protected final byte countTypeTag;
-
-    public AbstractUTF8Token() {
-        tokenTypeTag = -1;
-        countTypeTag = -1;
-    }
-
-    public AbstractUTF8Token(byte tokenTypeTag, byte countTypeTag) {
-        this.tokenTypeTag = tokenTypeTag;
-        this.countTypeTag = countTypeTag;
-    }
-
-    @Override
-    public byte[] getData() {
-        return data;
-    }
-
-    @Override
-    public int getLength() {
-        return length;
-    }
-
-    public int getLowerCaseUTF8Len(int size) {
-        int lowerCaseUTF8Len = 0;
-        int pos = start;
-        for (int i = 0; i < size; i++) {
-            char c = StringUtils.toLowerCase(StringUtils.charAt(data, pos));
-            lowerCaseUTF8Len += StringUtils.getModifiedUTF8Len(c);
-            pos += StringUtils.charSize(data, pos);
-        }
-        return lowerCaseUTF8Len;
-    }
-
-    @Override
-    public int getStart() {
-        return start;
-    }
-
-    @Override
-    public int getTokenLength() {
-        return tokenLength;
-    }
-
-    public void handleCountTypeTag(DataOutput dos) throws IOException {
-        if (countTypeTag > 0) {
-            dos.write(countTypeTag);
-        }
-    }
-
-    public void handleTokenTypeTag(DataOutput dos) throws IOException {
-        if (tokenTypeTag > 0) {
-            dos.write(tokenTypeTag);
-        }
-    }
-
-    @Override
-    public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
-        this.data = data;
-        this.start = start;
-        this.length = length;
-        this.tokenLength = tokenLength;
-        this.tokenCount = tokenCount;
-    }
-
-    @Override
-    public void serializeTokenCount(DataOutput dos) throws IOException {
-        handleCountTypeTag(dos);
-        dos.writeInt(tokenCount);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
deleted file mode 100644
index 849bfd1..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/AbstractUTF8TokenFactory.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public abstract class AbstractUTF8TokenFactory implements ITokenFactory {
-    private static final long serialVersionUID = 1L;
-    protected final byte tokenTypeTag;
-    protected final byte countTypeTag;
-
-    public AbstractUTF8TokenFactory() {
-        tokenTypeTag = -1;
-        countTypeTag = -1;
-    }
-
-    public AbstractUTF8TokenFactory(byte tokenTypeTag, byte countTypeTag) {
-        this.tokenTypeTag = tokenTypeTag;
-        this.countTypeTag = countTypeTag;
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
deleted file mode 100644
index 4b11026..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizer.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
-
-    public DelimitedUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
-            ITokenFactory tokenFactory) {
-        super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
-    }
-
-    @Override
-    public boolean hasNext() {
-        // skip delimiters
-        while (index < length && isSeparator(StringUtils.charAt(data, index))) {
-            index += StringUtils.charSize(data, index);
-        }
-        return index < length;
-    }
-
-    private boolean isSeparator(char c) {
-        return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
-    }
-
-    @Override
-    public void next() {
-        tokenLength = 0;
-        int currentTokenStart = index;
-        while (index < length && !isSeparator(StringUtils.charAt(data, index))) {
-            index += StringUtils.charSize(data, index);
-            tokenLength++;
-        }
-        int tokenCount = 1;
-        if (tokenLength > 0 && !ignoreTokenCount) {
-            // search if we got the same token before
-            for (int i = 0; i < tokensStart.length(); ++i) {
-                if (tokenLength == tokensLength.get(i)) {
-                    int tokenStart = tokensStart.get(i);
-                    tokenCount++; // assume we found it
-                    int offset = 0;
-                    int currLength = 0;
-                    while (currLength < tokenLength) {
-                        // case insensitive comparison
-                        if (StringUtils.toLowerCase(StringUtils.charAt(data, currentTokenStart + offset)) != StringUtils
-                                .toLowerCase(StringUtils.charAt(data, tokenStart + offset))) {
-                            tokenCount--;
-                            break;
-                        }
-                        offset += StringUtils.charSize(data, currentTokenStart + offset);
-                        currLength++;
-                    }
-                }
-            }
-            // add the new token to the list of seen tokens
-            tokensStart.add(currentTokenStart);
-            tokensLength.add(tokenLength);
-        }
-
-        // set token
-        token.reset(data, currentTokenStart, index, tokenLength, tokenCount);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
deleted file mode 100644
index ba49e5c..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/DelimitedUTF8StringBinaryTokenizerFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-public class DelimitedUTF8StringBinaryTokenizerFactory implements IBinaryTokenizerFactory {
-
-    private static final long serialVersionUID = 1L;
-    private final boolean ignoreTokenCount;
-    private final boolean sourceHasTypeTag;
-    private final ITokenFactory tokenFactory;
-
-    public DelimitedUTF8StringBinaryTokenizerFactory(boolean ignoreTokenCount, boolean sourceHasTypeTag,
-            ITokenFactory tokenFactory) {
-        this.ignoreTokenCount = ignoreTokenCount;
-        this.sourceHasTypeTag = sourceHasTypeTag;
-        this.tokenFactory = tokenFactory;
-    }
-
-    @Override
-    public IBinaryTokenizer createTokenizer() {
-        return new DelimitedUTF8StringBinaryTokenizer(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/742aba85/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
----------------------------------------------------------------------
diff --git a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java b/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
deleted file mode 100644
index 786d07b..0000000
--- a/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/tokenizer/HashedUTF8NGramToken.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.asterix.fuzzyjoin.tokenizer;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-public class HashedUTF8NGramToken extends UTF8NGramToken {
-    public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
-        super(tokenTypeTag, countTypeTag);
-    }
-
-    @Override
-    public void serializeToken(DataOutput dos) throws IOException {
-        handleTokenTypeTag(dos);
-
-        int hash = GOLDEN_RATIO_32;
-
-        // pre chars
-        for (int i = 0; i < numPreChars; i++) {
-            hash ^= PRECHAR;
-            hash *= GOLDEN_RATIO_32;
-        }
-
-        // regular chars
-        int numRegGrams = tokenLength - numPreChars - numPostChars;
-        int pos = start;
-        for (int i = 0; i < numRegGrams; i++) {
-            hash ^= StringUtils.toLowerCase(StringUtils.charAt(data, pos));
-            hash *= GOLDEN_RATIO_32;
-            pos += StringUtils.charSize(data, pos);
-        }
-
-        // post chars
-        for (int i = 0; i < numPostChars; i++) {
-            hash ^= POSTCHAR;
-            hash *= GOLDEN_RATIO_32;
-        }
-
-        // token count
-        hash += tokenCount;
-
-        dos.writeInt(hash);
-    }
-}



Mime
View raw message