pulsar-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] jerrypeng commented on a change in pull request #2605: implement topic routing on a per record basis
Date Wed, 19 Sep 2018 17:45:59 GMT
jerrypeng commented on a change in pull request #2605: implement topic routing on a per record
basis
URL: https://github.com/apache/incubator-pulsar/pull/2605#discussion_r218900232
 
 

 ##########
 File path: pulsar-functions/instance/src/main/java/org/apache/pulsar/functions/sink/PulsarSink.java
 ##########
 @@ -60,140 +62,153 @@
     private final String fqfn;
 
     private interface PulsarSinkProcessor<T> {
-        void initializeOutputProducer(String outputTopic, Schema<T> schema, String
fqfn) throws Exception;
 
         TypedMessageBuilder<T> newMessage(Record<T> record) throws Exception;
 
         void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> record)
throws Exception;
 
-        abstract void close() throws Exception;
+        void close() throws Exception;
     }
 
-    private class PulsarSinkAtMostOnceProcessor implements PulsarSinkProcessor<T> {
-        private Producer<T> producer;
+    private abstract class PulsarSinkProcessorBase implements PulsarSinkProcessor<T>
{
+        protected Map<String, Producer<T>> publishProducers = new ConcurrentHashMap<>();
+        protected Schema schema;
 
-        @Override
-        public void initializeOutputProducer(String outputTopic, Schema<T> schema,
String fqfn) throws Exception {
-            this.producer = AbstractOneOuputTopicProducers.createProducer(
-                    client, pulsarSinkConfig.getTopic(), null, schema, fqfn);
+        protected PulsarSinkProcessorBase(Schema schema) {
+            this.schema = schema;
         }
 
-        @Override
-        public TypedMessageBuilder<T> newMessage(Record<T> record) {
-            return producer.newMessage();
+        public <T> Producer<T> createProducer(PulsarClient client, String topic,
String producerName, Schema<T> schema, String fqfn)
+                throws PulsarClientException {
+            ProducerBuilder<T> builder = client.newProducer(schema)
+                    .blockIfQueueFull(true)
+                    .enableBatching(true)
+                    .batchingMaxPublishDelay(1, TimeUnit.MILLISECONDS)
+                    .compressionType(CompressionType.LZ4)
+                    .hashingScheme(HashingScheme.Murmur3_32Hash) //
+                    .messageRoutingMode(MessageRoutingMode.CustomPartition)
+                    .messageRouter(FunctionResultRouter.of())
+                    .topic(topic);
+            if (producerName != null) {
+                builder.producerName(producerName);
+            }
+
+            return builder
+                    .property("application", "pulsarfunction")
+                    .property("fqfn", fqfn).create();
         }
 
-        @Override
-        public void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> record)
throws Exception {
-            msg.sendAsync();
+        protected Producer<T> getProducer(String destinationTopic) {
+            return getProducer(destinationTopic, null, destinationTopic);
         }
 
-        @Override
-        public void close() throws Exception {
-            if (null != producer) {
+        protected Producer<T> getProducer(String producerId, String producerName, String
topicName) {
+
+            Producer<T> producer = publishProducers.get(producerId);
+
+            if (producer == null) {
                 try {
-                    producer.close();
+                    Producer<T> newProducer = createProducer(
+                            client,
+                            topicName,
+                            producerName,
+                            schema,
+                            fqfn);
+
+                    Producer<T> existingProducer = publishProducers.putIfAbsent(producerId,
newProducer);
 
 Review comment:
   yes because a message can have a any destination topic.  At the time of processing, we
may or maybe not have already created a producer for that topic.  We cache producers, so that
we don't create a producer for each message and try to reuse existing ones.  This is basically
the same logic for MultiConsumerProducer used for effectively once where we have to have separate
producers for each partition

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message