incubator-connectors-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From conflue...@apache.org
Subject [CONF] Apache Connectors Framework > How to Write an Output Connector
Date Sat, 23 Oct 2010 11:43:00 GMT
Space: Apache Connectors Framework (https://cwiki.apache.org/confluence/display/CONNECTORS)
Page: How to Write an Output Connector (https://cwiki.apache.org/confluence/display/CONNECTORS/How+to+Write+an+Output+Connector)

Comment edited by Farzad :
---------------------------------------------------------------------
Thanks for your answer.  I tried that way first and ran into problems. I have to execute multiple
txns depending if a dup is found.  So I just tried two try catch blocks using a flag, instead
of the nested version I first tried.  I'm getting these errors now:

{noformat}
Thread[Worker thread '44',5,main]: startTransaction: org.apache.manifoldcf.core.cachemanager.CacheManager@bc83c3:
Transaction hash = {}
org.apache.manifoldcf.core.interfaces.ManifoldCFException: Illegal parent transaction ID:
1287833203886
 at org.apache.manifoldcf.core.cachemanager.CacheManager.startTransaction(CacheManager.java:687)
 at org.apache.manifoldcf.core.database.Database.beginTransaction(Database.java:204)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.beginTransaction(DBInterfacePostgreSQL.java:995)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.beginTransaction(DBInterfacePostgreSQL.java:966)
 at org.apache.manifoldcf.core.database.BaseTable.beginTransaction(BaseTable.java:258)
 at org.apache.manifoldcf.agents.output.dupfinder.DataManager.insertData(DataManager.java:120)
 at org.apache.manifoldcf.agents.output.dupfinder.DupFinderConnector.addOrReplaceDocument(DupFinderConnector.java:78)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.addOrReplaceDocument(IncrementalIngester.java:1424)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.performIngestion(IncrementalIngester.java:409)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.documentIngest(IncrementalIngester.java:304)
 at org.apache.manifoldcf.crawler.system.WorkerThread$ProcessActivity.ingestDocument(WorkerThread.java:1586)
 at org.apache.manifoldcf.crawler.connectors.filesystem.FileConnector.processDocuments(FileConnector.java:275)
 at org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector.processDocuments(BaseRepositoryConnector.java:423)
 at org.apache.manifoldcf.crawler.system.WorkerThread.run(WorkerThread.java:585)
Thread[Worker thread '21',5,main]: invalidateKeys: 1287833215335: org.apache.manifoldcf.core.cachemanager.CacheManager@1dbe8b:

Transaction hash = {1287833215337=org.apache.manifoldcf.core.cachemanager.CacheManager$CacheTransactionHandle@1a7f162,
1287833197104=org.apache.manifoldcf.core.cachemanager.CacheManager$CacheTransactionHandle@c8b88f}
org.apache.manifoldcf.core.interfaces.ManifoldCFException: Bad transaction ID!
 at org.apache.manifoldcf.core.cachemanager.CacheManager.invalidateKeys(CacheManager.java:613)
 at org.apache.manifoldcf.core.cachemanager.CacheManager.findObjectsAndExecute(CacheManager.java:175)
 at org.apache.manifoldcf.core.database.Database.executeQuery(Database.java:167)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.performModification(DBInterfacePostgreSQL.java:586)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.performInsert(DBInterfacePostgreSQL.java:133)
 at org.apache.manifoldcf.core.database.BaseTable.performInsert(BaseTable.java:76)
 at org.apache.manifoldcf.agents.output.dupfinder.DataManager.insertData(DataManager.java:122)
 at org.apache.manifoldcf.agents.output.dupfinder.DupFinderConnector.addOrReplaceDocument(DupFinderConnector.java:78)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.addOrReplaceDocument(IncrementalIngester.java:1424)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.performIngestion(IncrementalIngester.java:409)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.documentIngest(IncrementalIngester.java:304)
 at org.apache.manifoldcf.crawler.system.WorkerThread$ProcessActivity.ingestDocument(WorkerThread.java:1586)
 at org.apache.manifoldcf.crawler.connectors.filesystem.FileConnector.processDocuments(FileConnector.java:275)
 at org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector.processDocuments(BaseRepositoryConnector.java:423)
 at org.apache.manifoldcf.crawler.system.WorkerThread.run(WorkerThread.java:585)
{noformat}

Here is the code I'm executing.  Also what is invalidate keys used for on performInsert, the
second parm?
{code}
beginTransaction();
try {
	performInsert(map, null);
} catch (ManifoldCFException e) {
	// According the Karl, the only two reasons a ManifoldCF exception is thrown
	// after a performInsert is either constraint violation or dead lock situation.
	// He continued to say that unless you are in the middle of transaction, you will
	// only encounter the constraint violation.
	signalRollback();
	if (e.getErrorCode() == ManifoldCFException.DATABASE_TRANSACTION_ABORT) {
		isDuplicate = true;
	} else {
		// We got a different error code, that needs to be addressed.
		throw e;
	}
} catch (Error e) {
	signalRollback();
	throw e;
} finally {
	endTransaction();
}

if (isDuplicate) {
	System.out.println("[" + currentContext.get("id") + "] Duplicate found, retrying with newDupNum");
	ArrayList list = new ArrayList();
	list.add(hashsumVal);
	IResultSet result = performQuery("SELECT max(" + dupnum + ") FROM " + getTableName() + "
WHERE " + hashsum + "=?", list, null, null);
	if (result.getRowCount() == 1) {
		System.out.println("[" + currentContext.get("id") + "] " + "Found the highest dup number,
result set contains " + result.getRowCount() + " row");
		IResultRow row = result.getRow(0);
		Integer oldDupNum = (Integer) row.getValue("max");
		int newDupNum = oldDupNum.intValue() + 1;
		System.out.println("[" + currentContext.get("id") + "] " + "oldDupNum=" + oldDupNum + ",
newDupNum=" + newDupNum);
		map.put(dupnum, new Integer(newDupNum));
		beginTransaction();
		try {
			performInsert(map, null);
		} catch (ManifoldCFException f) {
			signalRollback();
			throw f;
		} catch (Error f) {
			signalRollback();
			throw f;
		} finally {
			endTransaction();
		}
	} else {
		// This case happens when either no rows or more than one row is returned for the
		// query. It should never happen, because we are looking for a max and the fact
		// that the initial insert failed says there is at least one row with a value.
	}
}

{code}

Comment was previously :
---------------------------------------------------------------------
Thanks for your answer.  I tried that way first and ran into problems. I have to execute multiple
txns depending if a dup is found.  So I just tried two try catch blocks using a flag, instead
of the nested version I first tried.  I'm getting these errors now:

{noformat}
Thread[Worker thread '44',5,main]: startTransaction: org.apache.manifoldcf.core.cachemanager.CacheManager@bc83c3:
Transaction hash = {}
org.apache.manifoldcf.core.interfaces.ManifoldCFException: Illegal parent transaction ID:
1287833203886
 at org.apache.manifoldcf.core.cachemanager.CacheManager.startTransaction(CacheManager.java:687)
 at org.apache.manifoldcf.core.database.Database.beginTransaction(Database.java:204)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.beginTransaction(DBInterfacePostgreSQL.java:995)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.beginTransaction(DBInterfacePostgreSQL.java:966)
 at org.apache.manifoldcf.core.database.BaseTable.beginTransaction(BaseTable.java:258)
 at org.apache.manifoldcf.agents.output.dupfinder.DataManager.insertData(DataManager.java:120)
 at org.apache.manifoldcf.agents.output.dupfinder.DupFinderConnector.addOrReplaceDocument(DupFinderConnector.java:78)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.addOrReplaceDocument(IncrementalIngester.java:1424)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.performIngestion(IncrementalIngester.java:409)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.documentIngest(IncrementalIngester.java:304)
 at org.apache.manifoldcf.crawler.system.WorkerThread$ProcessActivity.ingestDocument(WorkerThread.java:1586)
 at org.apache.manifoldcf.crawler.connectors.filesystem.FileConnector.processDocuments(FileConnector.java:275)
 at org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector.processDocuments(BaseRepositoryConnector.java:423)
 at org.apache.manifoldcf.crawler.system.WorkerThread.run(WorkerThread.java:585)
Thread[Worker thread '21',5,main]: invalidateKeys: 1287833215335: org.apache.manifoldcf.core.cachemanager.CacheManager@1dbe8b:
Transaction hash = {1287833215337=org.apache.manifoldcf.core.cachemanager.CacheManager$CacheTransactionHandle@1a7f162,
1287833197104=org.apache.manifoldcf.core.cachemanager.CacheManager$CacheTransactionHandle@c8b88f}
org.apache.manifoldcf.core.interfaces.ManifoldCFException: Bad transaction ID!
 at org.apache.manifoldcf.core.cachemanager.CacheManager.invalidateKeys(CacheManager.java:613)
 at org.apache.manifoldcf.core.cachemanager.CacheManager.findObjectsAndExecute(CacheManager.java:175)
 at org.apache.manifoldcf.core.database.Database.executeQuery(Database.java:167)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.performModification(DBInterfacePostgreSQL.java:586)
 at org.apache.manifoldcf.core.database.DBInterfacePostgreSQL.performInsert(DBInterfacePostgreSQL.java:133)
 at org.apache.manifoldcf.core.database.BaseTable.performInsert(BaseTable.java:76)
 at org.apache.manifoldcf.agents.output.dupfinder.DataManager.insertData(DataManager.java:122)
 at org.apache.manifoldcf.agents.output.dupfinder.DupFinderConnector.addOrReplaceDocument(DupFinderConnector.java:78)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.addOrReplaceDocument(IncrementalIngester.java:1424)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.performIngestion(IncrementalIngester.java:409)
 at org.apache.manifoldcf.agents.incrementalingest.IncrementalIngester.documentIngest(IncrementalIngester.java:304)
 at org.apache.manifoldcf.crawler.system.WorkerThread$ProcessActivity.ingestDocument(WorkerThread.java:1586)
 at org.apache.manifoldcf.crawler.connectors.filesystem.FileConnector.processDocuments(FileConnector.java:275)
 at org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector.processDocuments(BaseRepositoryConnector.java:423)
 at org.apache.manifoldcf.crawler.system.WorkerThread.run(WorkerThread.java:585)
{noformat}

Here is the code I'm executing.  Also what is invalidate keys used for on performInsert, the
second parm?
{code}
beginTransaction();
try {
	performInsert(map, null);
} catch (ManifoldCFException e) {
	// According the Karl, the only two reasons a ManifoldCF exception is thrown
	// after a performInsert is either constraint violation or dead lock situation.
	// He continued to say that unless you are in the middle of transaction, you will
	// only encounter the constraint violation.
	signalRollback();
	if (e.getErrorCode() == ManifoldCFException.DATABASE_TRANSACTION_ABORT) {
		isDuplicate = true;
	} else {
		// We got a different error code, that needs to be addressed.
		throw e;
	}
} catch (Error e) {
	signalRollback();
	throw e;
} finally {
	endTransaction();
}

if (isDuplicate) {
	System.out.println("[" + currentContext.get("id") + "] Duplicate found, retrying with newDupNum");
	ArrayList list = new ArrayList();
	list.add(hashsumVal);
	IResultSet result = performQuery("SELECT max(" + dupnum + ") FROM " + getTableName() + "
WHERE " + hashsum + "=?", list, null, null);
	if (result.getRowCount() == 1) {
		System.out.println("[" + currentContext.get("id") + "] " + "Found the highest dup number,
result set contains " + result.getRowCount() + " row");
		IResultRow row = result.getRow(0);
		Integer oldDupNum = (Integer) row.getValue("max");
		int newDupNum = oldDupNum.intValue() + 1;
		System.out.println("[" + currentContext.get("id") + "] " + "oldDupNum=" + oldDupNum + ",
newDupNum=" + newDupNum);
		map.put(dupnum, new Integer(newDupNum));
		beginTransaction();
		try {
			performInsert(map, null);
		} catch (ManifoldCFException f) {
			signalRollback();
			throw f;
		} catch (Error f) {
			signalRollback();
			throw f;
		} finally {
			endTransaction();
		}
	} else {
		// This case happens when either no rows or more than one row is returned for the
		// query. It should never happen, because we are looking for a max and the fact
		// that the initial insert failed says there is at least one row with a value.
	}
}

{code}

Change your notification preferences: https://cwiki.apache.org/confluence/users/viewnotifications.action

Mime
View raw message