Skip to content

Commit 157fd86

Browse files
authored
fix(exports): format csv with formula and breakdown (#40607)
1 parent f48bbc1 commit 157fd86

File tree

2 files changed

+275
-13
lines changed

2 files changed

+275
-13
lines changed

posthog/tasks/exports/csv_exporter.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
from ...exceptions import QuerySizeExceeded
2323
from ...hogql.constants import CSV_EXPORT_BREAKDOWN_LIMIT_INITIAL, CSV_EXPORT_BREAKDOWN_LIMIT_LOW, CSV_EXPORT_LIMIT
2424
from ...hogql.query import LimitContext
25+
from ...hogql_queries.insights.trends.breakdown import (
26+
BREAKDOWN_NULL_DISPLAY,
27+
BREAKDOWN_NULL_STRING_LABEL,
28+
BREAKDOWN_OTHER_DISPLAY,
29+
BREAKDOWN_OTHER_STRING_LABEL,
30+
)
2531
from ..exporter import EXPORT_ASSET_UNKNOWN_COUNTER, EXPORT_FAILED_COUNTER, EXPORT_SUCCEEDED_COUNTER, EXPORT_TIMER
2632
from .ordered_csv_renderer import OrderedCsvRenderer
2733

@@ -78,7 +84,7 @@ def add_query_params(url: str, params: dict[str, str]) -> str:
7884
return urlunparse(parsed)
7985

8086

81-
def _convert_response_to_csv_data(data: Any) -> Generator[Any, None, None]:
87+
def _convert_response_to_csv_data(data: Any, breakdown_filter: Optional[dict] = None) -> Generator[Any, None, None]:
8288
if isinstance(data.get("results"), list):
8389
results = data.get("results")
8490
if len(results) > 0 and (isinstance(results[0], list) or isinstance(results[0], tuple)) and data.get("types"):
@@ -166,7 +172,9 @@ def _convert_response_to_csv_data(data: Any) -> Generator[Any, None, None]:
166172

167173
yield line
168174
return
169-
elif isinstance(first_result.get("data"), list):
175+
elif isinstance(first_result.get("data"), list) or (
176+
first_result.get("data") is None and "aggregated_value" in first_result
177+
):
170178
is_comparison = first_result.get("compare_label")
171179

172180
# take date labels from current results, when comparing against previous
@@ -186,9 +194,33 @@ def _convert_response_to_csv_data(data: Any) -> Generator[Any, None, None]:
186194

187195
if isinstance(action, dict) and action.get("custom_name"):
188196
line["custom name"] = action.get("custom_name")
189-
if item.get("aggregated_value"):
190-
line["total count"] = item.get("aggregated_value")
191-
else:
197+
198+
if "breakdown_value" in item:
199+
breakdown_value = item.get("breakdown_value")
200+
breakdown_values = breakdown_value if isinstance(breakdown_value, list) else [breakdown_value]
201+
202+
# Get breakdown property names from filter
203+
breakdowns = breakdown_filter.get("breakdowns", []) if breakdown_filter else []
204+
# For single breakdown, check legacy "breakdown" field
205+
if not breakdowns and breakdown_filter and "breakdown" in breakdown_filter:
206+
breakdowns = [{"property": breakdown_filter.get("breakdown")}]
207+
208+
for idx, val in enumerate(breakdown_values):
209+
# Get the property name from the breakdown filter
210+
prop_name = breakdowns[idx].get("property") if idx < len(breakdowns) else None
211+
if not prop_name:
212+
continue
213+
# Format special breakdown values for display
214+
formatted_val = str(val) if val is not None else ""
215+
if formatted_val == BREAKDOWN_OTHER_STRING_LABEL:
216+
formatted_val = BREAKDOWN_OTHER_DISPLAY
217+
elif formatted_val == BREAKDOWN_NULL_STRING_LABEL:
218+
formatted_val = BREAKDOWN_NULL_DISPLAY
219+
line[prop_name] = formatted_val
220+
221+
if item.get("aggregated_value") is not None:
222+
line["Total Sum"] = item.get("aggregated_value")
223+
elif item.get("data"):
192224
for index, data in enumerate(item["data"]):
193225
line[label_item["labels"][index]] = data
194226

@@ -289,7 +321,9 @@ def get_from_hogql_query(exported_asset: ExportedAsset, limit: int, resource: di
289321

290322
if isinstance(query_response, BaseModel):
291323
query_response = query_response.model_dump(by_alias=True)
292-
yield from _convert_response_to_csv_data(query_response)
324+
325+
breakdown_filter = query.get("breakdownFilter") if query else None
326+
yield from _convert_response_to_csv_data(query_response, breakdown_filter=breakdown_filter)
293327
return
294328

295329

posthog/tasks/exports/test/test_csv_exporter.py

Lines changed: 235 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -807,13 +807,13 @@ def test_csv_exporter_trends_query_with_compare_previous_option(
807807
lines = (content or "").strip().splitlines()
808808

809809
expected_lines = [
810-
"series,21-Mar-2023,22-Mar-2023,23-Mar-2023,24-Mar-2023,25-Mar-2023,26-Mar-2023,27-Mar-2023,28-Mar-2023",
811-
"Chrome - current,2.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0",
812-
"Firefox - current,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0",
813-
"Safari - current,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",
814-
"Chrome - previous,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0",
815-
"Firefox - previous,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0",
816-
"Safari - previous,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0",
810+
"series,$browser,21-Mar-2023,22-Mar-2023,23-Mar-2023,24-Mar-2023,25-Mar-2023,26-Mar-2023,27-Mar-2023,28-Mar-2023",
811+
"Chrome - current,Chrome,2.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0",
812+
"Firefox - current,Firefox,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0",
813+
"Safari - current,Safari,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",
814+
"Chrome - previous,Chrome,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0",
815+
"Firefox - previous,Firefox,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0",
816+
"Safari - previous,Safari,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0",
817817
]
818818

819819
self.assertEqual(lines, expected_lines)
@@ -885,3 +885,231 @@ def test_csv_exporter_trends_actors(
885885
"d0780d6b-ccd0-44fa-a227-47efe4f3f30d,,,user_2,1,user_2",
886886
],
887887
)
888+
889+
@patch("posthog.models.exported_asset.UUIDT")
890+
def test_csv_exporter_trends_query_with_formula(
891+
self, mocked_uuidt: Any, MAX_SELECT_RETURNED_ROWS: int = 10
892+
) -> None:
893+
with freeze_time("2024-05-15T12:00:00.000Z"):
894+
_create_person(distinct_ids=["formula_test_user_xyz"], team=self.team)
895+
896+
events_by_person = {
897+
"formula_test_user_xyz": [
898+
{"event": "formula_test_event_a", "timestamp": datetime(2024, 5, 15, 13, 46)},
899+
{"event": "formula_test_event_b", "timestamp": datetime(2024, 5, 15, 13, 47)},
900+
],
901+
}
902+
journeys_for(events_by_person, self.team)
903+
flush_persons_and_events()
904+
905+
exported_asset = ExportedAsset(
906+
team=self.team,
907+
export_format=ExportedAsset.ExportFormat.CSV,
908+
export_context={
909+
"source": {
910+
"kind": "TrendsQuery",
911+
"dateRange": {"date_to": "2024-05-15", "date_from": "2024-05-15"},
912+
"series": [
913+
{"kind": "EventsNode", "event": "formula_test_event_a", "name": "Event A", "math": "total"},
914+
{"kind": "EventsNode", "event": "formula_test_event_b", "name": "Event B", "math": "total"},
915+
],
916+
"interval": "day",
917+
"trendsFilter": {
918+
"showLegend": True,
919+
"display": "ActionsTable",
920+
"formula": "A+B",
921+
},
922+
}
923+
},
924+
)
925+
exported_asset.save()
926+
mocked_uuidt.return_value = "a-guid"
927+
928+
with self.settings(OBJECT_STORAGE_ENABLED=True, OBJECT_STORAGE_EXPORTS_FOLDER="Test-Exports"):
929+
csv_exporter.export_tabular(exported_asset)
930+
931+
assert (
932+
exported_asset.content_location
933+
== f"{TEST_PREFIX}/csv/team-{self.team.id}/task-{exported_asset.id}/a-guid"
934+
)
935+
936+
content = object_storage.read(exported_asset.content_location)
937+
lines = (content or "").strip().split("\r\n")
938+
self.assertEqual(
939+
lines,
940+
[
941+
"series,Total Sum",
942+
"Formula (A+B),2.0",
943+
],
944+
)
945+
946+
@patch("posthog.models.exported_asset.UUIDT")
947+
def test_csv_exporter_trends_query_with_formula_and_single_breakdown(
948+
self, mocked_uuidt: Any, MAX_SELECT_RETURNED_ROWS: int = 10
949+
) -> None:
950+
with freeze_time("2024-06-10T12:00:00.000Z"):
951+
_create_person(distinct_ids=["breakdown_user_single"], team=self.team)
952+
953+
_create_event(
954+
event="breakdown_single_event_a",
955+
distinct_id="breakdown_user_single",
956+
team=self.team,
957+
timestamp=datetime(2024, 6, 10, 13, 46),
958+
properties={"country": "USA"},
959+
)
960+
_create_event(
961+
event="breakdown_single_event_b",
962+
distinct_id="breakdown_user_single",
963+
team=self.team,
964+
timestamp=datetime(2024, 6, 10, 13, 47),
965+
properties={"country": "USA"},
966+
)
967+
flush_persons_and_events()
968+
969+
exported_asset = ExportedAsset(
970+
team=self.team,
971+
export_format=ExportedAsset.ExportFormat.CSV,
972+
export_context={
973+
"source": {
974+
"kind": "TrendsQuery",
975+
"dateRange": {"date_to": "2024-06-10", "date_from": "2024-06-10"},
976+
"series": [
977+
{
978+
"kind": "EventsNode",
979+
"event": "breakdown_single_event_a",
980+
"name": "Event A",
981+
"math": "total",
982+
},
983+
{
984+
"kind": "EventsNode",
985+
"event": "breakdown_single_event_b",
986+
"name": "Event B",
987+
"math": "total",
988+
},
989+
],
990+
"interval": "day",
991+
"trendsFilter": {
992+
"showLegend": True,
993+
"display": "ActionsTable",
994+
"formula": "A+B",
995+
},
996+
"breakdownFilter": {
997+
"breakdown": "country",
998+
"breakdown_type": "event",
999+
},
1000+
}
1001+
},
1002+
)
1003+
exported_asset.save()
1004+
mocked_uuidt.return_value = "a-guid"
1005+
1006+
with self.settings(OBJECT_STORAGE_ENABLED=True, OBJECT_STORAGE_EXPORTS_FOLDER="Test-Exports"):
1007+
csv_exporter.export_tabular(exported_asset)
1008+
1009+
assert (
1010+
exported_asset.content_location
1011+
== f"{TEST_PREFIX}/csv/team-{self.team.id}/task-{exported_asset.id}/a-guid"
1012+
)
1013+
1014+
content = object_storage.read(exported_asset.content_location)
1015+
lines = (content or "").strip().split("\r\n")
1016+
self.assertEqual(
1017+
lines,
1018+
[
1019+
"series,country,Total Sum",
1020+
"Formula (A+B),USA,2.0",
1021+
],
1022+
)
1023+
1024+
@patch("posthog.models.exported_asset.UUIDT")
1025+
def test_csv_exporter_trends_query_with_formula_and_multiple_breakdowns(
1026+
self, mocked_uuidt: Any, MAX_SELECT_RETURNED_ROWS: int = 10
1027+
) -> None:
1028+
with freeze_time("2024-07-20T12:00:00.000Z"):
1029+
_create_person(distinct_ids=["multi_breakdown_user_1"], team=self.team)
1030+
_create_person(distinct_ids=["multi_breakdown_user_2"], team=self.team)
1031+
1032+
_create_event(
1033+
event="multi_breakdown_event_a",
1034+
distinct_id="multi_breakdown_user_1",
1035+
team=self.team,
1036+
timestamp=datetime(2024, 7, 20, 13, 46),
1037+
properties={"$browser": "Chrome"},
1038+
)
1039+
_create_event(
1040+
event="multi_breakdown_event_b",
1041+
distinct_id="multi_breakdown_user_1",
1042+
team=self.team,
1043+
timestamp=datetime(2024, 7, 20, 13, 47),
1044+
properties={"$browser": "Chrome"},
1045+
)
1046+
_create_event(
1047+
event="multi_breakdown_event_a",
1048+
distinct_id="multi_breakdown_user_2",
1049+
team=self.team,
1050+
timestamp=datetime(2024, 7, 20, 13, 48),
1051+
properties={"$browser": "Firefox"},
1052+
)
1053+
flush_persons_and_events()
1054+
1055+
exported_asset = ExportedAsset(
1056+
team=self.team,
1057+
export_format=ExportedAsset.ExportFormat.CSV,
1058+
export_context={
1059+
"source": {
1060+
"kind": "TrendsQuery",
1061+
"dateRange": {"date_to": "2024-07-20", "date_from": "2024-07-20"},
1062+
"series": [
1063+
{
1064+
"kind": "EventsNode",
1065+
"event": "multi_breakdown_event_a",
1066+
"name": "Event A",
1067+
"math": "total",
1068+
},
1069+
{
1070+
"kind": "EventsNode",
1071+
"event": "multi_breakdown_event_b",
1072+
"name": "Event B",
1073+
"math": "total",
1074+
},
1075+
],
1076+
"interval": "day",
1077+
"trendsFilter": {
1078+
"showLegend": True,
1079+
"display": "ActionsTable",
1080+
"formula": "A+B",
1081+
},
1082+
"breakdownFilter": {
1083+
"breakdowns": [
1084+
{"property": "distinct_id", "type": "event_metadata"},
1085+
{"property": "$browser", "type": "event"},
1086+
]
1087+
},
1088+
}
1089+
},
1090+
)
1091+
exported_asset.save()
1092+
mocked_uuidt.return_value = "a-guid"
1093+
1094+
with self.settings(OBJECT_STORAGE_ENABLED=True, OBJECT_STORAGE_EXPORTS_FOLDER="Test-Exports"):
1095+
csv_exporter.export_tabular(exported_asset)
1096+
1097+
assert (
1098+
exported_asset.content_location
1099+
== f"{TEST_PREFIX}/csv/team-{self.team.id}/task-{exported_asset.id}/a-guid"
1100+
)
1101+
1102+
content = object_storage.read(exported_asset.content_location)
1103+
lines = (content or "").strip().split("\r\n")
1104+
1105+
# Sort data lines for consistent comparison (order may vary)
1106+
data_lines = sorted(lines[1:])
1107+
1108+
self.assertEqual(
1109+
lines[0:1] + data_lines,
1110+
[
1111+
"series,distinct_id,$browser,Total Sum",
1112+
"Formula (A+B),multi_breakdown_user_1,Chrome,2.0",
1113+
"Formula (A+B),multi_breakdown_user_2,Firefox,1.0",
1114+
],
1115+
)

0 commit comments

Comments
 (0)