|
10 | 10 | INGEST_FLUSH_TIMEOUT_SEC, |
11 | 11 | INGEST_QUEUE_MAXSIZE, |
12 | 12 | VERBOSE, |
| 13 | + PRIO_DB, |
13 | 14 | ) |
14 | 15 | import threading |
15 | 16 | import time |
|
21 | 22 | extract_log_excerpt, |
22 | 23 | ) |
23 | 24 | import kcidb_io |
24 | | -from django.db import transaction |
| 25 | +from django.db import connections, transaction |
25 | 26 | from kernelCI_app.models import Issues, Checkouts, Builds, Tests, Incidents |
26 | 27 |
|
27 | 28 | from kernelCI_app.management.commands.helpers.process_submissions import ( |
@@ -111,23 +112,85 @@ def prepare_file_data( |
111 | 112 | } |
112 | 113 |
|
113 | 114 |
|
114 | | -def consume_buffer(buffer: list[TableModels], item_type: TableNames) -> None: |
| 115 | +def consume_buffer(buffer: list[TableModels], table_name: TableNames) -> None: |
115 | 116 | """ |
116 | 117 | Consume a buffer of items and insert them into the database. |
117 | 118 | This function is called by the db_worker thread. |
118 | 119 | """ |
119 | 120 | if not buffer: |
120 | 121 | return |
121 | 122 |
|
122 | | - model = MODEL_MAP[item_type] |
| 123 | + model = MODEL_MAP[table_name] |
| 124 | + |
| 125 | + # Get nullable fields for coalesce |
| 126 | + # Get non-nullable but updateable fields such as timestamp for other functions |
| 127 | + |
| 128 | + updateable_model_fields = [] |
| 129 | + updateable_db_fields = [] |
| 130 | + query_params_properties: list[tuple] = [] |
| 131 | + for field in model._meta.fields: |
| 132 | + if field.generated: |
| 133 | + continue |
| 134 | + |
| 135 | + field_name = ( |
| 136 | + field.name + "_id" |
| 137 | + if field.get_internal_type() == "ForeignKey" |
| 138 | + else field.name |
| 139 | + ) |
| 140 | + real_name = field.db_column or field_name |
| 141 | + |
| 142 | + operation = "GREATEST" if real_name == "_timestamp" else "COALESCE" |
| 143 | + |
| 144 | + query_params_properties.append((real_name, operation)) |
| 145 | + |
| 146 | + updateable_model_fields.append(field_name) |
| 147 | + updateable_db_fields.append(real_name) |
| 148 | + |
| 149 | + print("🚀 ~ query_params_properties:", query_params_properties) |
| 150 | + print("🚀 ~ all_updateable_model_fields:", updateable_model_fields) |
| 151 | + print("🚀 ~ all_updateable_db_fields:", updateable_db_fields) |
| 152 | + |
| 153 | + conflict_clauses = [] |
| 154 | + for field, op in query_params_properties: |
| 155 | + if PRIO_DB: |
| 156 | + conflict_clauses.append( |
| 157 | + f""" |
| 158 | + {field} = {op}({table_name}.{field}, EXCLUDED.{field})""" |
| 159 | + ) |
| 160 | + else: |
| 161 | + conflict_clauses.append( |
| 162 | + f""" |
| 163 | + {field} = {op}(EXCLUDED.{field}, {table_name}.{field})""" |
| 164 | + ) |
| 165 | + |
| 166 | + query = f""" |
| 167 | + INSERT INTO {table_name} ( |
| 168 | + {', '.join(updateable_db_fields)} |
| 169 | + ) |
| 170 | + VALUES ( |
| 171 | + {', '.join(['%s'] * len(updateable_db_fields))} |
| 172 | + ) |
| 173 | + ON CONFLICT (id) |
| 174 | + DO UPDATE SET {', '.join(conflict_clauses)}; |
| 175 | + """ |
| 176 | + print("🚀 ~ query:", query) |
| 177 | + |
| 178 | + params = [] |
| 179 | + for obj in buffer: |
| 180 | + obj_values = [] |
| 181 | + for field in updateable_model_fields: |
| 182 | + value = getattr(obj, field) |
| 183 | + if isinstance(value, (dict, list)): |
| 184 | + value = json.dumps(value) |
| 185 | + obj_values.append(value) |
| 186 | + params.append(tuple(obj_values)) |
| 187 | + print("🚀 ~ params:", params) |
123 | 188 |
|
124 | 189 | t0 = time.time() |
125 | | - model.objects.bulk_create( |
126 | | - buffer, |
127 | | - batch_size=INGEST_BATCH_SIZE, |
128 | | - ignore_conflicts=True, |
129 | | - ) |
130 | | - out("bulk_create %s: n=%d in %.3fs" % (item_type, len(buffer), time.time() - t0)) |
| 190 | + with connections["default"].cursor() as cursor: |
| 191 | + cursor.executemany(query, params) |
| 192 | + |
| 193 | + out("bulk_create %s: n=%d in %.3fs" % (table_name, len(buffer), time.time() - t0)) |
131 | 194 |
|
132 | 195 |
|
133 | 196 | def flush_buffers( |
|
0 commit comments