Skip to content

Commit 821a79b

Browse files
authored
Merge pull request #756 from Altinity/745-comparison-of-single-threaded-vs-queue-approach-for-memory-usage-snapshot
Added functionality to replicate in single threaded mode based on configuration without using a Queue
2 parents 8c5b04c + 2dacc41 commit 821a79b

File tree

7 files changed

+419
-46
lines changed

7 files changed

+419
-46
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ First two are good tutorials on MySQL and PostgreSQL respectively.
5959
* [Logging](doc/logging.md)
6060
* [Production Setup](doc/production_setup.md)
6161
* [Adding new tables(Incremental Snapshot)](doc/incremental_snapshot.md)
62+
* [Configuration](doc/configuration.md)
6263

6364
### Operations
6465

doc/configuration.md

Lines changed: 33 additions & 32 deletions
Large diffs are not rendered by default.

doc/production_setup.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ The maximum number of rows that the connector fetches and reads into memory when
6565

6666
**snapshot.max.threads**: Increase this number from 1 to a higher value to enable parallel snapshotting.
6767

68+
**Single Threaded (Low Memory/Slow replication)**:
69+
By setting the `single.threaded: true` configuration variable in `config.yml`, the replication will skip the sink connector queue and threadpool
70+
and will insert batches directly from the debezium queue.
71+
This mode will work on lower memory setup but will increase the replication speed.
72+
6873
## PostgreSQL Production Setup
6974

7075
One of the common problems with PostgreSQL is the WAL size increasing.

sink-connector-lightweight/src/main/java/com/altinity/clickhouse/debezium/embedded/cdc/DebeziumChangeEventCapture.java

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import com.altinity.clickhouse.sink.connector.db.operations.ClickHouseAlterTable;
1414
import com.altinity.clickhouse.sink.connector.executor.ClickHouseBatchExecutor;
1515
import com.altinity.clickhouse.sink.connector.executor.ClickHouseBatchRunnable;
16+
import com.altinity.clickhouse.sink.connector.executor.ClickHouseBatchWriter;
1617
import com.altinity.clickhouse.sink.connector.model.ClickHouseStruct;
1718
import com.altinity.clickhouse.sink.connector.model.DBCredentials;
1819
import com.clickhouse.jdbc.ClickHouseConnection;
@@ -29,6 +30,7 @@
2930
import org.apache.kafka.connect.source.SourceRecord;
3031
import org.apache.logging.log4j.LogManager;
3132
import org.apache.logging.log4j.Logger;
33+
import org.checkerframework.checker.units.qual.C;
3234
import org.json.simple.JSONArray;
3335
import org.json.simple.JSONObject;
3436
import org.json.simple.parser.ParseException;
@@ -81,6 +83,8 @@ public class DebeziumChangeEventCapture {
8183
// Keep one clickhouse connection.
8284
private ClickHouseConnection conn;
8385

86+
ClickHouseBatchWriter singleThreadedWriter;
87+
8488
public DebeziumChangeEventCapture() {
8589
singleThreadDebeziumEventExecutor = Executors.newFixedThreadPool(1);
8690
}
@@ -596,7 +600,7 @@ public void handleBatch(List<ChangeEvent<SourceRecord, SourceRecord>> list,
596600

597601

598602
if(batch.size() > 0) {
599-
appendToRecords(batch);
603+
appendToRecords(batch, config);
600604
}
601605
}
602606
});
@@ -775,23 +779,33 @@ DBCredentials parseDBConfiguration(ClickHouseSinkConnectorConfig config) {
775779
*/
776780
private void setupProcessingThread(ClickHouseSinkConnectorConfig config) {
777781

778-
// Setup separate thread to read messages from shared buffer.
779-
// this.records = new ConcurrentLinkedQueue<>();
780-
//this.runnable = new ClickHouseBatchRunnable(this.records, config, new HashMap());
781-
ThreadFactory namedThreadFactory =
782-
new ThreadFactoryBuilder().setNameFormat("Sink Connector thread-pool-%d").build();
783-
this.executor = new ClickHouseBatchExecutor(config.getInt(ClickHouseSinkConnectorConfigVariables.THREAD_POOL_SIZE.toString()), namedThreadFactory);
784-
for(int i = 0; i < config.getInt(ClickHouseSinkConnectorConfigVariables.THREAD_POOL_SIZE.toString()); i++) {
785-
this.executor.scheduleAtFixedRate(new ClickHouseBatchRunnable(this.records, config, new HashMap()), 0,
786-
config.getLong(ClickHouseSinkConnectorConfigVariables.BUFFER_FLUSH_TIME.toString()), TimeUnit.MILLISECONDS);
782+
if(config.getBoolean(ClickHouseSinkConnectorConfigVariables.SINGLE_THREADED.toString())) {
783+
log.info("********* Running in Single Threaded mode *********");
784+
singleThreadedWriter = new ClickHouseBatchWriter(config, new HashMap());
787785
}
786+
787+
ThreadFactory namedThreadFactory =
788+
new ThreadFactoryBuilder().setNameFormat("Sink Connector thread-pool-%d").build();
789+
this.executor = new ClickHouseBatchExecutor(config.getInt(ClickHouseSinkConnectorConfigVariables.THREAD_POOL_SIZE.toString()), namedThreadFactory);
790+
for (int i = 0; i < config.getInt(ClickHouseSinkConnectorConfigVariables.THREAD_POOL_SIZE.toString()); i++) {
791+
this.executor.scheduleAtFixedRate(new ClickHouseBatchRunnable(this.records, config, new HashMap()), 0,
792+
config.getLong(ClickHouseSinkConnectorConfigVariables.BUFFER_FLUSH_TIME.toString()), TimeUnit.MILLISECONDS);
793+
}
794+
788795
//this.executor.scheduleAtFixedRate(this.runnable, 0, config.getLong(ClickHouseSinkConnectorConfigVariables.BUFFER_FLUSH_TIME.toString()), TimeUnit.MILLISECONDS);
789796
}
790797

791-
private void appendToRecords(List<ClickHouseStruct> convertedRecords) {
798+
private void appendToRecords(List<ClickHouseStruct> convertedRecords, ClickHouseSinkConnectorConfig config) {
792799

793-
synchronized (this.records) {
794-
this.records.add(convertedRecords);
800+
// If config is set to single threaded.
801+
if(config.getBoolean(ClickHouseSinkConnectorConfigVariables.SINGLE_THREADED.toString())) {
802+
singleThreadedWriter.persistRecords(convertedRecords);
803+
804+
} else {
805+
806+
synchronized (this.records) {
807+
this.records.add(convertedRecords);
808+
}
795809
}
796810

797811

sink-connector/src/main/java/com/altinity/clickhouse/sink/connector/ClickHouseSinkConnectorConfig.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,16 @@ static ConfigDef newConfigDef() {
450450
6,
451451
ConfigDef.Width.NONE,
452452
ClickHouseSinkConnectorConfigVariables.MAX_QUEUE_SIZE.toString())
453+
.define(
454+
ClickHouseSinkConnectorConfigVariables.SINGLE_THREADED.toString(),
455+
Type.BOOLEAN,
456+
false,
457+
Importance.HIGH,
458+
"Single threaded mode",
459+
CONFIG_GROUP_CONNECTOR_CONFIG,
460+
6,
461+
ConfigDef.Width.NONE,
462+
ClickHouseSinkConnectorConfigVariables.SINGLE_THREADED.toString())
453463
.define(
454464
ClickHouseSinkConnectorConfigVariables.REPLICA_STATUS_VIEW.toString(),
455465
Type.STRING,

sink-connector/src/main/java/com/altinity/clickhouse/sink/connector/ClickHouseSinkConnectorConfigVariables.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ public enum ClickHouseSinkConnectorConfigVariables {
7373
JDBC_PARAMETERS("clickhouse.jdbc.params"),
7474

7575
REPLICA_STATUS_VIEW("replica.status.view"),
76-
MAX_QUEUE_SIZE("sink.connector.max.queue.size");
76+
MAX_QUEUE_SIZE("sink.connector.max.queue.size"),
77+
78+
SINGLE_THREADED("single.threaded");
7779

7880
private String label;
7981

0 commit comments

Comments
 (0)