Skip to content

Commit a34c5c3

Browse files
authored
Merge pull request #485 from mapswipe/reduce-tasks-table-size
Reduce tasks table size
2 parents 0ec1250 + d5618c0 commit a34c5c3

File tree

2 files changed

+136
-2
lines changed

2 files changed

+136
-2
lines changed

mapswipe_workers/mapswipe_workers/project_types/base/project.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -502,8 +502,19 @@ def create_groups_txt_file(self, groups):
502502
"project_type_specifics": dict(),
503503
}
504504

505+
# these common attributes don't need to be written
506+
# to the project_type_specifics since they are
507+
# already stored in separate columns
508+
common_attributes = [
509+
"projectId",
510+
"groupId",
511+
"numberOfTasks",
512+
"requiredCount",
513+
"finishedCount" "progress",
514+
]
515+
505516
for key in group.keys():
506-
if key not in output_dict.keys():
517+
if key not in common_attributes:
507518
output_dict["project_type_specifics"][key] = group[key]
508519
output_dict["project_type_specifics"] = json.dumps(
509520
output_dict["project_type_specifics"]
@@ -570,8 +581,14 @@ def create_tasks_txt_file(self, groupsOfTasks):
570581
"geom": task["geometry"],
571582
"project_type_specifics": dict(),
572583
}
584+
585+
# these common attributes don't need to be written
586+
# to the project_type_specifics since they are
587+
# already stored in separate columns
588+
common_attributes = ["projectId", "groupId", "taskId", "geometry"]
589+
573590
for key in task.keys():
574-
if key not in output_dict.keys():
591+
if key not in common_attributes:
575592
output_dict["project_type_specifics"][key] = task[key]
576593
output_dict["project_type_specifics"] = json.dumps(
577594
output_dict["project_type_specifics"]
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
from mapswipe_workers import auth
2+
from mapswipe_workers.definitions import logger, sentry
3+
4+
5+
def get_project_ids_from_postgres():
6+
"""Get project ids."""
7+
8+
p_con = auth.postgresDB()
9+
10+
query = """
11+
SELECT project_id FROM projects;
12+
"""
13+
data = p_con.retr_query(query)
14+
project_ids = [item[0] for item in data]
15+
16+
logger.info("Got projects from postgres.")
17+
logger.info(project_ids)
18+
return project_ids
19+
20+
21+
def update_tasks_table(project_id: str):
22+
"""Remove duplicates in 'project_types_specifics' attribute in tasks table."""
23+
24+
logger.info(f"Start process for project: '{project_id}'")
25+
p_con = auth.postgresDB()
26+
27+
query = """
28+
UPDATE tasks
29+
SET project_type_specifics = project_type_specifics::jsonb
30+
#- '{projectId}'
31+
#- '{groupId}'
32+
#- '{taskId}'
33+
#- '{geometry}'
34+
#- '{wkt}'
35+
WHERE project_id = %(project_id)s
36+
"""
37+
try:
38+
p_con.query(query, {"project_id": project_id})
39+
logger.info(f"Updated tasks table for project '{project_id}'.")
40+
except Exception as e:
41+
sentry.capture_exception(e)
42+
sentry.capture_message(
43+
f"Could NOT update tasks table for project '{project_id}'."
44+
)
45+
logger.exception(e)
46+
logger.warning(f"Could NOT update tasks table for project '{project_id}'.")
47+
48+
49+
def update_groups_table(project_id: str):
50+
"""Remove duplicates in 'project_types_specifics' attribute in groups table."""
51+
52+
logger.info(f"Start process for project: '{project_id}'")
53+
p_con = auth.postgresDB()
54+
55+
query = """
56+
UPDATE groups
57+
SET project_type_specifics = project_type_specifics::jsonb
58+
#- '{projectId}'
59+
#- '{id}'
60+
#- '{requiredCount}'
61+
#- '{finishedCount}'
62+
#- '{neededCount}'
63+
#- '{reportCount}'
64+
#- '{distributedCount}'
65+
WHERE project_id = %(project_id)s
66+
"""
67+
try:
68+
p_con.query(query, {"project_id": project_id})
69+
logger.info(f"Updated tasks table for project '{project_id}'.")
70+
except Exception as e:
71+
sentry.capture_exception(e)
72+
sentry.capture_message(
73+
f"Could NOT update tasks table for project '{project_id}'."
74+
)
75+
logger.exception(e)
76+
logger.warning(f"Could NOT update tasks table for project '{project_id}'.")
77+
78+
79+
def run_vacuum_tasks_table():
80+
"""Run vacuum to reclaim storage."""
81+
logger.info("Start vacuum on tasks table.")
82+
p_con = auth.postgresDB()
83+
# isolation_level 0 will move you out of a transaction block
84+
old_isolation_level = p_con._db_connection.isolation_level
85+
p_con._db_connection.set_isolation_level(0)
86+
query = """
87+
VACUUM tasks
88+
"""
89+
p_con.query(query)
90+
# set isolation_level back to initial value
91+
p_con._db_connection.set_isolation_level(old_isolation_level)
92+
logger.info("Finish vacuum on tasks table.")
93+
94+
95+
def run_vacuum_groups_table():
96+
"""Run vacuum to reclaim storage."""
97+
logger.info("Start vacuum on groups table.")
98+
p_con = auth.postgresDB()
99+
# isolation_level 0 will move you out of a transaction block
100+
old_isolation_level = p_con._db_connection.isolation_level
101+
p_con._db_connection.set_isolation_level(0)
102+
query = """
103+
VACUUM groups
104+
"""
105+
p_con.query(query)
106+
# set isolation_level back to initial value
107+
p_con._db_connection.set_isolation_level(old_isolation_level)
108+
logger.info("Finish vacuum on groups table.")
109+
110+
111+
if __name__ == "__main__":
112+
project_ids_list = get_project_ids_from_postgres()
113+
for i, project_id in enumerate(project_ids_list):
114+
update_tasks_table(project_id)
115+
update_groups_table(project_id)
116+
run_vacuum_tasks_table()
117+
run_vacuum_groups_table()

0 commit comments

Comments
 (0)