cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "liangsibin (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (CASSANDRA-13266) Bulk loading sometimes is very slow?
Date Sat, 25 Feb 2017 13:30:44 GMT

     [ https://issues.apache.org/jira/browse/CASSANDRA-13266?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

liangsibin updated CASSANDRA-13266:
-----------------------------------
    Description: 
When I bulkload sstable created with CQLSSTableWriter, it's sometimes very slow.  
use 2 nodes write SSTable and bulkload
1、Use CQLSSTableWriter create SSTable (60 threads)
2、When the directory over 100000 rows,bulkload the directory (20 threads)
the normal bulkload speed is about 70M/s per node,and bulkload 141G SStables per node cost
90 minutes but sometimes is very slow,the same data cost 4 hours why?
here is the code bulkload sstable
{code:java}
public class JmxBulkLoader {
	
    static final Logger LOGGER = LoggerFactory.getLogger(JmxBulkLoader.class);
	private JMXConnector connector;
	private StorageServiceMBean storageBean;
	private Timer timer = new Timer();

	public JmxBulkLoader(String host, int port) throws Exception {
		connect(host, port);
	}


	private void connect(String host, int port) throws IOException, MalformedObjectNameException
{
		JMXServiceURL jmxUrl = new JMXServiceURL(
				String.format("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi", host, port));
		Map<String, Object> env = new HashMap<String, Object>();
		connector = JMXConnectorFactory.connect(jmxUrl, env);
		MBeanServerConnection mbeanServerConn = connector.getMBeanServerConnection();
		ObjectName name = new ObjectName("org.apache.cassandra.db:type=StorageService");
		storageBean = JMX.newMBeanProxy(mbeanServerConn, name, StorageServiceMBean.class);
	}

	public void close() throws IOException {
		connector.close();
	}

	public void bulkLoad(String path) {
		LOGGER.info("begin load data to cassandra " + new Path(path).getName());
		timer.start();
		storageBean.bulkLoad(path);
		timer.end();
		LOGGER.info("bulk load took " + timer.getTimeTakenMillis() + "ms, path: " + new Path(path).getName());
	}
}
{code}
bulkload thread
{code:java} 
public class BulkThread implements Runnable {

	private String path;
	private String jmxHost;
	private int jmxPort;
	
	public BulkThread(String path, String jmxHost, int jmxPort) {
		super();
		this.path = path;
		this.jmxHost = jmxHost;
		this.jmxPort = jmxPort;
	}
	@Override
	public void run() {
		JmxBulkLoader bulkLoader = null;
		try {
			bulkLoader = new JmxBulkLoader(jmxHost, jmxPort);
			bulkLoader.bulkLoad(path);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (bulkLoader != null)
				try {
					bulkLoader.close();
					bulkLoader = null;
				} catch (IOException e) {
					e.printStackTrace();
				}
		}
	}
}
{code}

  was:
When I bulkload sstable created with CQLSSTableWriter, it's sometimes very slow.  
use 2 nodes write SSTable and bulkload
1、Use CQLSSTableWriter create SSTable (60 threads)
2、When the directory over 100000 rows,bulkload the directory (20 threads)
the normal bulkload speed is about 70M/s per node,and bulkload 141G SStables cost 90 minutes
but sometimes is very slow,the same data cost 4 hours why?
here is the code bulkload sstable
{code:java}
public class JmxBulkLoader {
	
    static final Logger LOGGER = LoggerFactory.getLogger(JmxBulkLoader.class);
	private JMXConnector connector;
	private StorageServiceMBean storageBean;
	private Timer timer = new Timer();

	public JmxBulkLoader(String host, int port) throws Exception {
		connect(host, port);
	}


	private void connect(String host, int port) throws IOException, MalformedObjectNameException
{
		JMXServiceURL jmxUrl = new JMXServiceURL(
				String.format("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi", host, port));
		Map<String, Object> env = new HashMap<String, Object>();
		connector = JMXConnectorFactory.connect(jmxUrl, env);
		MBeanServerConnection mbeanServerConn = connector.getMBeanServerConnection();
		ObjectName name = new ObjectName("org.apache.cassandra.db:type=StorageService");
		storageBean = JMX.newMBeanProxy(mbeanServerConn, name, StorageServiceMBean.class);
	}

	public void close() throws IOException {
		connector.close();
	}

	public void bulkLoad(String path) {
		LOGGER.info("begin load data to cassandra " + new Path(path).getName());
		timer.start();
		storageBean.bulkLoad(path);
		timer.end();
		LOGGER.info("bulk load took " + timer.getTimeTakenMillis() + "ms, path: " + new Path(path).getName());
	}
}
{code}
bulkload thread
{code:java} 
public class BulkThread implements Runnable {

	private String path;
	private String jmxHost;
	private int jmxPort;
	
	public BulkThread(String path, String jmxHost, int jmxPort) {
		super();
		this.path = path;
		this.jmxHost = jmxHost;
		this.jmxPort = jmxPort;
	}
	@Override
	public void run() {
		JmxBulkLoader bulkLoader = null;
		try {
			bulkLoader = new JmxBulkLoader(jmxHost, jmxPort);
			bulkLoader.bulkLoad(path);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (bulkLoader != null)
				try {
					bulkLoader.close();
					bulkLoader = null;
				} catch (IOException e) {
					e.printStackTrace();
				}
		}
	}
}
{code}


> Bulk loading sometimes is very slow?
> ------------------------------------
>
>                 Key: CASSANDRA-13266
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-13266
>             Project: Cassandra
>          Issue Type: Improvement
>            Reporter: liangsibin
>
> When I bulkload sstable created with CQLSSTableWriter, it's sometimes very slow.  
> use 2 nodes write SSTable and bulkload
> 1、Use CQLSSTableWriter create SSTable (60 threads)
> 2、When the directory over 100000 rows,bulkload the directory (20 threads)
> the normal bulkload speed is about 70M/s per node,and bulkload 141G SStables per node
cost 90 minutes but sometimes is very slow,the same data cost 4 hours why?
> here is the code bulkload sstable
> {code:java}
> public class JmxBulkLoader {
> 	
>     static final Logger LOGGER = LoggerFactory.getLogger(JmxBulkLoader.class);
> 	private JMXConnector connector;
> 	private StorageServiceMBean storageBean;
> 	private Timer timer = new Timer();
> 	public JmxBulkLoader(String host, int port) throws Exception {
> 		connect(host, port);
> 	}
> 	private void connect(String host, int port) throws IOException, MalformedObjectNameException
{
> 		JMXServiceURL jmxUrl = new JMXServiceURL(
> 				String.format("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi", host, port));
> 		Map<String, Object> env = new HashMap<String, Object>();
> 		connector = JMXConnectorFactory.connect(jmxUrl, env);
> 		MBeanServerConnection mbeanServerConn = connector.getMBeanServerConnection();
> 		ObjectName name = new ObjectName("org.apache.cassandra.db:type=StorageService");
> 		storageBean = JMX.newMBeanProxy(mbeanServerConn, name, StorageServiceMBean.class);
> 	}
> 	public void close() throws IOException {
> 		connector.close();
> 	}
> 	public void bulkLoad(String path) {
> 		LOGGER.info("begin load data to cassandra " + new Path(path).getName());
> 		timer.start();
> 		storageBean.bulkLoad(path);
> 		timer.end();
> 		LOGGER.info("bulk load took " + timer.getTimeTakenMillis() + "ms, path: " + new Path(path).getName());
> 	}
> }
> {code}
> bulkload thread
> {code:java} 
> public class BulkThread implements Runnable {
> 	private String path;
> 	private String jmxHost;
> 	private int jmxPort;
> 	
> 	public BulkThread(String path, String jmxHost, int jmxPort) {
> 		super();
> 		this.path = path;
> 		this.jmxHost = jmxHost;
> 		this.jmxPort = jmxPort;
> 	}
> 	@Override
> 	public void run() {
> 		JmxBulkLoader bulkLoader = null;
> 		try {
> 			bulkLoader = new JmxBulkLoader(jmxHost, jmxPort);
> 			bulkLoader.bulkLoad(path);
> 		} catch (Exception e) {
> 			e.printStackTrace();
> 		} finally {
> 			if (bulkLoader != null)
> 				try {
> 					bulkLoader.close();
> 					bulkLoader = null;
> 				} catch (IOException e) {
> 					e.printStackTrace();
> 				}
> 		}
> 	}
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Mime
View raw message