zipkin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From adrianc...@apache.org
Subject [incubator-zipkin] 01/01: Adds cheap string encoding for IpV4 path used in protobuf
Date Sat, 11 May 2019 09:13:52 GMT
This is an automated email from the ASF dual-hosted git repository.

adriancole pushed a commit to branch cheap-ipv4
in repository https://gitbox.apache.org/repos/asf/incubator-zipkin.git

commit d723c53d6500e97cc2b912efe8034823ec2352f6
Author: Adrian Cole <acole@pivotal.io>
AuthorDate: Sat May 11 17:11:58 2019 +0800

    Adds cheap string encoding for IpV4 path used in protobuf
    
    Commonly, we parse protobuf, but write json. This implies reading bytes
    and encoding IP literals. This improves performance of IpV4 address
    encoding and tries to pay for the extra bytecode by removing some.
---
 .../src/main/java/zipkin2/EndpointBenchmarks.java  | 13 ++++-
 .../java/zipkin2/codec/ProtoCodecBenchmarks.java   |  2 +-
 zipkin/src/main/java/zipkin2/Endpoint.java         | 67 +++++++++++++++-------
 zipkin/src/main/java/zipkin2/internal/Buffer.java  |  6 --
 .../src/main/java/zipkin2/internal/JsonCodec.java  | 11 +---
 .../main/java/zipkin2/internal/ThriftCodec.java    |  1 +
 zipkin/src/test/java/zipkin2/EndpointTest.java     | 28 ++++++++-
 7 files changed, 87 insertions(+), 41 deletions(-)

diff --git a/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java b/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java
index afdc9a1..690c033 100644
--- a/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java
+++ b/benchmarks/src/main/java/zipkin2/EndpointBenchmarks.java
@@ -39,10 +39,10 @@ import org.openjdk.jmh.runner.options.OptionsBuilder;
 @Measurement(iterations = 5, time = 1)
 @Warmup(iterations = 10, time = 1)
 @Fork(3)
-@BenchmarkMode(Mode.AverageTime)
+@BenchmarkMode(Mode.SampleTime)
 @OutputTimeUnit(TimeUnit.MICROSECONDS)
 @State(Scope.Thread)
-@Threads(1)
+@Threads(2)
 public class EndpointBenchmarks {
   static final String IPV4 = "43.0.192.2", IPV6 = "2001:db8::c001";
   static final InetAddress IPV4_ADDR, IPV6_ADDR;
@@ -66,6 +66,10 @@ public class EndpointBenchmarks {
     return builder.parseIp(IPV4_ADDR);
   }
 
+  @Benchmark public boolean parseIpv4_bytes() {
+    return builder.parseIp(IPV4_ADDR.getAddress());
+  }
+
   @Benchmark public boolean parseIpv6_literal() {
     return builder.parseIp(IPV6);
   }
@@ -74,9 +78,14 @@ public class EndpointBenchmarks {
     return builder.parseIp(IPV6_ADDR);
   }
 
+  @Benchmark public boolean parseIpv6_bytes() {
+    return builder.parseIp(IPV6_ADDR.getAddress());
+  }
+
   // Convenience main entry-point
   public static void main(String[] args) throws RunnerException {
     Options opt = new OptionsBuilder()
+      .addProfiler("gc")
       .include(".*" + EndpointBenchmarks.class.getSimpleName())
       .build();
 
diff --git a/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java b/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java
index 85fb28c..a863158 100644
--- a/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java
+++ b/benchmarks/src/main/java/zipkin2/codec/ProtoCodecBenchmarks.java
@@ -103,7 +103,7 @@ public class ProtoCodecBenchmarks {
   // Convenience main entry-point
   public static void main(String[] args) throws Exception {
     Options opt = new OptionsBuilder()
-      .include(".*" + ProtoCodecBenchmarks.class.getSimpleName() + ".*bytes_zipkin")
+      .include(".*" + ProtoCodecBenchmarks.class.getSimpleName() + ".*bytes.*")
       .addProfiler("gc")
       .build();
 
diff --git a/zipkin/src/main/java/zipkin2/Endpoint.java b/zipkin/src/main/java/zipkin2/Endpoint.java
index ae17473..af29260 100644
--- a/zipkin/src/main/java/zipkin2/Endpoint.java
+++ b/zipkin/src/main/java/zipkin2/Endpoint.java
@@ -177,18 +177,13 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
     /**
      * Like {@link #parseIp(String)} except this accepts a byte array.
      *
-     * @param ipBytes byte array whose ownership is exclusively transfered to this endpoint.
+     * @param ipBytes byte array whose ownership is exclusively transferred to this endpoint.
      */
     public final boolean parseIp(byte[] ipBytes) {
       if (ipBytes == null) return false;
       if (ipBytes.length == 4) {
         ipv4Bytes = ipBytes;
-        ipv4 = String.valueOf(
-          ipBytes[0] & 0xff) + '.'
-          + (ipBytes[1] & 0xff) + '.'
-          + (ipBytes[2] & 0xff) + '.'
-          + (ipBytes[3] & 0xff
-        );
+        ipv4 = writeIpV4(ipBytes);
       } else if (ipBytes.length == 16) {
         if (!parseEmbeddedIPv4(ipBytes)) {
           ipv6 = writeIpV6(ipBytes);
@@ -200,6 +195,33 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
       return true;
     }
 
+    static String writeIpV4(byte[] ipBytes) {
+      char[] buf = ipBuffer();
+      int pos = 0;
+      pos = writeBackwards(ipBytes[0] & 0xff, pos, buf);
+      buf[pos++] = '.';
+      pos = writeBackwards(ipBytes[1] & 0xff, pos, buf);
+      buf[pos++] = '.';
+      pos = writeBackwards(ipBytes[2] & 0xff, pos, buf);
+      buf[pos++] = '.';
+      pos = writeBackwards(ipBytes[3] & 0xff, pos, buf);
+      return new String(buf, 0, pos);
+    }
+
+    static int writeBackwards(int b, int pos, char[] buf) {
+      if (b < 10) {
+        buf[pos] = HEX_DIGITS[b];
+        return pos + 1;
+      }
+      int i = pos += b < 100 ? 2 : 3; // We write backwards from right to left.
+      while (b != 0) {
+        int digit = b % 10;
+        buf[--i] = HEX_DIGITS[digit];
+        b /= 10;
+      }
+      return pos;
+    }
+
     /** Chaining variant of {@link #parseIp(String)} */
     public Builder ip(@Nullable String ipString) {
       parseIp(ipString);
@@ -340,19 +362,24 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
     return IpFamily.Unknown;
   }
 
-  private static boolean notHex(char c) {
+  static boolean notHex(char c) {
     return (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c
< 'A' || c > 'F');
   }
 
-  private static final ThreadLocal<char[]> IPV6_TO_STRING = new ThreadLocal<char[]>()
{
-    @Override protected char[] initialValue() {
-      return new char[39]; // maximum length of encoded ipv6
+  static final ThreadLocal<char[]> IP_BUFFER = new ThreadLocal<>();
+
+  static char[] ipBuffer() {
+    char[] idBuffer = IP_BUFFER.get();
+    if (idBuffer == null) {
+      idBuffer = new char[39]; // maximum length of encoded ipv6
+      IP_BUFFER.set(idBuffer);
     }
-  };
+    return idBuffer;
+  }
 
   static String writeIpV6(byte[] ipv6) {
     int pos = 0;
-    char[] buf = IPV6_TO_STRING.get();
+    char[] buf = ipBuffer();
 
     // Compress the longest string of zeros
     int zeroCompressionIndex = -1;
@@ -414,10 +441,10 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
     {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
 
   // Begin code from com.google.common.net.InetAddresses 23
-  private static final int IPV6_PART_COUNT = 8;
+  static final int IPV6_PART_COUNT = 8;
 
   @Nullable
-  private static byte[] textToNumericFormatV6(String ipString) {
+  static byte[] textToNumericFormatV6(String ipString) {
     // An address can have [2..8] colons, and N colons make N+1 parts.
     String[] parts = ipString.split(":", IPV6_PART_COUNT + 2);
     if (parts.length < 3 || parts.length > IPV6_PART_COUNT + 1) {
@@ -480,7 +507,7 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
     return rawBytes.array();
   }
 
-  private static short parseHextet(String ipPart) {
+  static short parseHextet(String ipPart) {
     // Note: we already verified that this string contains only hex digits.
     int hextet = Integer.parseInt(ipPart, 16);
     if (hextet > 0xffff) {
@@ -491,7 +518,7 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
   // End code from com.google.common.net.InetAddresses 23
 
   // Begin code from io.netty.util.NetUtil 4.1
-  private static boolean isValidIpV4Address(String ip, int from, int toExcluded) {
+  static boolean isValidIpV4Address(String ip, int from, int toExcluded) {
     int len = toExcluded - from;
     int i;
     return len <= 15 && len >= 7 &&
@@ -501,7 +528,7 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
       isValidIpV4Word(ip, i + 1, toExcluded);
   }
 
-  private static boolean isValidIpV4Word(CharSequence word, int from, int toExclusive) {
+  static boolean isValidIpV4Word(CharSequence word, int from, int toExclusive) {
     int len = toExclusive - from;
     char c0, c1, c2;
     if (len < 1 || len > 3 || (c0 = word.charAt(from)) < '0') {
@@ -516,7 +543,7 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
     return c0 <= '9' && (len == 1 || isValidNumericChar(word.charAt(from + 1)));
   }
 
-  private static boolean isValidNumericChar(char c) {
+  static boolean isValidNumericChar(char c) {
     return c >= '0' && c <= '9';
   }
   // End code from io.netty.util.NetUtil 4.1
@@ -584,7 +611,7 @@ public final class Endpoint implements Serializable { // for Spark and
Flink job
   }
 
   // TODO: replace this with native proto3 encoding
-  private static final class SerializedForm implements Serializable {
+  static final class SerializedForm implements Serializable {
     static final long serialVersionUID = 0L;
 
     final String serviceName, ipv4, ipv6;
diff --git a/zipkin/src/main/java/zipkin2/internal/Buffer.java b/zipkin/src/main/java/zipkin2/internal/Buffer.java
index 925f5ae..c5d9b8b 100644
--- a/zipkin/src/main/java/zipkin2/internal/Buffer.java
+++ b/zipkin/src/main/java/zipkin2/internal/Buffer.java
@@ -135,12 +135,6 @@ public abstract class Buffer {
     data[pos + 1] = (byte) HEX_DIGITS[b & 0xf];
   }
 
-  static void checkNotTruncated(int pos, int lastIndex) {
-    if (pos > lastIndex) {
-      throw new IllegalArgumentException("Truncated reading position " + pos);
-    }
-  }
-
   public abstract void writeByte(int v);
 
   public abstract void write(byte[] v);
diff --git a/zipkin/src/main/java/zipkin2/internal/JsonCodec.java b/zipkin/src/main/java/zipkin2/internal/JsonCodec.java
index 4fe624a..cce9f9a 100644
--- a/zipkin/src/main/java/zipkin2/internal/JsonCodec.java
+++ b/zipkin/src/main/java/zipkin2/internal/JsonCodec.java
@@ -182,15 +182,6 @@ public final class JsonCodec {
         }
       }
 
-      final byte[] bytesWritten;
-      if (lengthWritten == bytes.length) {
-        bytesWritten = bytes;
-      } else {
-        bytesWritten = new byte[lengthWritten];
-        System.arraycopy(bytes, 0, bytesWritten, 0, lengthWritten);
-      }
-
-      String written = new String(bytesWritten, UTF_8);
       // Don't use value directly in the message, as its toString might be implemented using
this
       // method. If that's the case, we'd stack overflow. Instead, emit what we've written
so far.
       String message =
@@ -200,7 +191,7 @@ public final class JsonCodec {
           value.getClass().getSimpleName(),
           lengthWritten,
           bytes.length,
-          written);
+          new String(bytes, 0, lengthWritten, UTF_8));
       throw Platform.get().assertionError(message, e);
     }
     return b.toByteArrayUnsafe();
diff --git a/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java b/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java
index 3b70f15..ef38c5d 100644
--- a/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java
+++ b/zipkin/src/main/java/zipkin2/internal/ThriftCodec.java
@@ -199,6 +199,7 @@ public final class ThriftCodec {
   }
 
   static String readUtf8(ByteBuffer bytes) {
+    // TODO: optimize out the array copy here
     return new String(readByteArray(bytes), UTF_8);
   }
 
diff --git a/zipkin/src/test/java/zipkin2/EndpointTest.java b/zipkin/src/test/java/zipkin2/EndpointTest.java
index defab78..4f28074 100644
--- a/zipkin/src/test/java/zipkin2/EndpointTest.java
+++ b/zipkin/src/test/java/zipkin2/EndpointTest.java
@@ -56,8 +56,15 @@ public class EndpointTest {
     assertExpectedIpv4(endpoint);
   }
 
-  @Test
-  public void ip_string_ipv4() {
+  @Test public void ip_bytes_ipv4() throws Exception {
+    Endpoint.Builder newBuilder = Endpoint.newBuilder();
+    assertThat(newBuilder.parseIp(Inet4Address.getByName("43.0.192.2").getAddress())).isTrue();
+    Endpoint endpoint = newBuilder.build();
+
+    assertExpectedIpv4(endpoint);
+  }
+
+  @Test public void ip_string_ipv4() {
     Endpoint.Builder newBuilder = Endpoint.newBuilder();
     assertThat(newBuilder.parseIp("43.0.192.2")).isTrue();
     Endpoint endpoint = newBuilder.build();
@@ -93,6 +100,23 @@ public class EndpointTest {
       .containsExactly(Inet6Address.getByName(ipv6).getAddress());
   }
 
+  @Test public void parseIp_ipv6_bytes() throws Exception {
+    String ipv6 = "2001:db8::c001";
+
+    Endpoint.Builder newBuilder = Endpoint.newBuilder();
+    assertThat(newBuilder.parseIp(Inet6Address.getByName(ipv6))).isTrue();
+    Endpoint endpoint = newBuilder.build();
+
+    assertThat(endpoint.ipv4())
+      .isNull();
+    assertThat(endpoint.ipv4Bytes())
+      .isNull();
+    assertThat(endpoint.ipv6())
+      .isEqualTo(ipv6);
+    assertThat(endpoint.ipv6Bytes())
+      .containsExactly(Inet6Address.getByName(ipv6).getAddress());
+  }
+
   @Test public void ip_ipv6_mappedIpv4() {
     String ipv6 = "::FFFF:43.0.192.2";
     Endpoint endpoint = Endpoint.newBuilder().ip(ipv6).build();


Mime
View raw message