Skip to content

Commit 5d93835

Browse files
authored
Api v2 improvements (#6164)
* API v2: fix returning buckets outside of queried range * For time:hour and time:minute, sessions are smeared using time_slots. The fix is to filter out time_slots that fall outside of the utc boundaries * For any other time dimension, there's no session smearing, but since sessions are put into time buckets by the last event timestamps, the query might return buckets that are outside of the query time range. The fix is to clamp those sessions into the last bucket instead. * allow time dimensions when querying views_per_visit * update changelog * stop generating bad timeslots instead
1 parent 85b39ee commit 5d93835

File tree

9 files changed

+328
-15
lines changed

9 files changed

+328
-15
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
66

77
### Added
88

9+
- Allow querying `views_per_visit` with a time dimension in Stats API
910
- Add `bounce_rate` to page-filtered Top Stats even when imports are included, but render a metric warning about imported data not included in `bounce_rate` tooltip.
1011
- Add `time_on_page` to page-filtered Top Stats even when imports are included, unless legacy time on page is in view.
1112
- Adds team_id to query debug metadata (saved in system.query_log log_comment column)
@@ -21,6 +22,7 @@ All notable changes to this project will be documented in this file.
2122

2223
### Fixed
2324

25+
- Fixed Stats API timeseries returning time buckets falling outside the queried range
2426
- Fixed issue with all non-interactive events being counted as interactive
2527
- Fixed countries map countries staying highlighted on Chrome
2628

lib/plausible/stats/query_builder.ex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -514,11 +514,11 @@ defmodule Plausible.Stats.QueryBuilder do
514514
message: "Metric `#{metric}` cannot be queried with a filter on `event:page`."
515515
}}
516516

517-
length(query.dimensions) > 0 ->
517+
Enum.any?(query.dimensions, &(not Time.time_dimension?(&1))) ->
518518
{:error,
519519
%QueryError{
520520
code: :invalid_metrics,
521-
message: "Metric `#{metric}` cannot be queried with `dimensions`."
521+
message: "Metric `#{metric}` cannot be queried with non-time dimensions."
522522
}}
523523

524524
true ->

lib/plausible/stats/sql/expression.ex

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ defmodule Plausible.Stats.SQL.Expression do
1212

1313
import Ecto.Query
1414

15-
alias Plausible.Stats.{Query, Filters, SQL}
15+
alias Plausible.Stats.{Query, Filters, SQL, Time}
1616

1717
@no_ref "Direct / None"
1818
@no_channel "Direct"
@@ -27,25 +27,61 @@ defmodule Plausible.Stats.SQL.Expression do
2727
end
2828
end
2929

30-
defmacrop time_slots(query, period_in_seconds) do
30+
defmacrop time_slots(query, period_in_seconds, first, last) do
3131
quote do
3232
fragment(
33-
"timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?))",
33+
"""
34+
timeSlots(
35+
toTimeZone(greatest(?, ?), ?),
36+
toUInt32(timeDiff(greatest(?, ?), least(?, ?))),
37+
toUInt32(?)
38+
)
39+
""",
3440
s.start,
41+
^unquote(first),
3542
^unquote(query).timezone,
3643
s.start,
44+
^unquote(first),
3745
s.timestamp,
46+
^unquote(last),
3847
^unquote(period_in_seconds)
3948
)
4049
end
4150
end
4251

52+
def select_dimension(q, key, "time:month", :sessions, query) do
53+
{_first, last_datetime} = Time.utc_boundaries(query)
54+
55+
select_merge_as(q, [t], %{
56+
key =>
57+
fragment(
58+
"toStartOfMonth(toTimeZone(least(?, ?), ?))",
59+
t.timestamp,
60+
^last_datetime,
61+
^query.timezone
62+
)
63+
})
64+
end
65+
4366
def select_dimension(q, key, "time:month", _table, query) do
4467
select_merge_as(q, [t], %{
4568
key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
4669
})
4770
end
4871

72+
def select_dimension(q, key, "time:week", :sessions, query) do
73+
{_first, last_datetime} = Time.utc_boundaries(query)
74+
date_range = Query.date_range(query)
75+
76+
select_merge_as(q, [t], %{
77+
key =>
78+
weekstart_not_before(
79+
to_timezone(fragment("least(?, ?)", t.timestamp, ^last_datetime), ^query.timezone),
80+
^date_range.first
81+
)
82+
})
83+
end
84+
4985
def select_dimension(q, key, "time:week", _table, query) do
5086
date_range = Query.date_range(query)
5187

@@ -58,6 +94,20 @@ defmodule Plausible.Stats.SQL.Expression do
5894
})
5995
end
6096

97+
def select_dimension(q, key, "time:day", :sessions, query) do
98+
{_first, last_datetime} = Time.utc_boundaries(query)
99+
100+
select_merge_as(q, [t], %{
101+
key =>
102+
fragment(
103+
"toDate(toTimeZone(least(?, ?), ?))",
104+
t.timestamp,
105+
^last_datetime,
106+
^query.timezone
107+
)
108+
})
109+
end
110+
61111
def select_dimension(q, key, "time:day", _table, query) do
62112
select_merge_as(q, [t], %{
63113
key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
@@ -69,8 +119,10 @@ defmodule Plausible.Stats.SQL.Expression do
69119
# timezone-aware. This means that for e.g. Asia/Katmandu (GMT+5:45)
70120
# to work, we divide time into 15-minute buckets and later combine these
71121
# via toStartOfHour
122+
{first, last} = Time.utc_boundaries(query)
123+
72124
q
73-
|> join(:inner, [s], time_slot in time_slots(query, 15 * 60),
125+
|> join(:inner, [s], time_slot in time_slots(query, 15 * 60, first, last),
74126
as: :time_slot,
75127
hints: "ARRAY",
76128
on: true
@@ -89,8 +141,10 @@ defmodule Plausible.Stats.SQL.Expression do
89141
# :NOTE: This is not exposed in Query APIv2
90142
def select_dimension(q, key, "time:minute", :sessions, query)
91143
when query.smear_session_metrics do
144+
{first, last} = Time.utc_boundaries(query)
145+
92146
q
93-
|> join(:inner, [s], time_slot in time_slots(query, 60),
147+
|> join(:inner, [s], time_slot in time_slots(query, 60, first, last),
94148
as: :time_slot,
95149
hints: "ARRAY",
96150
on: true

test/plausible/stats/query/query_parse_and_build_test.exs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2039,7 +2039,7 @@ defmodule Plausible.Stats.Query.QueryParseAndBuildTest do
20392039
assert error == "Metric `views_per_visit` cannot be queried with a filter on `event:page`."
20402040
end
20412041

2042-
test "fails validation with dimensions", %{site: site} do
2042+
test "fails validation with non-time dimensions", %{site: site} do
20432043
params = %{
20442044
"site_id" => site.domain,
20452045
"metrics" => ["views_per_visit"],
@@ -2050,7 +2050,7 @@ defmodule Plausible.Stats.Query.QueryParseAndBuildTest do
20502050
assert {:error, %QueryError{message: error}} =
20512051
Query.parse_and_build(site, params, now: @now)
20522052

2053-
assert error == "Metric `views_per_visit` cannot be queried with `dimensions`."
2053+
assert error == "Metric `views_per_visit` cannot be queried with non-time dimensions."
20542054
end
20552055
end
20562056

test/plausible/stats/query/query_test.exs

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,196 @@ defmodule Plausible.Stats.QueryTest do
125125
]
126126
end
127127
end
128+
129+
describe "session smearing respects query date range boundaries" do
130+
test "time:hour does not include buckets from outside the query range",
131+
%{site: site} do
132+
populate_stats(site, [
133+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 23:55:00]),
134+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 00:10:00]),
135+
build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 23:55:00]),
136+
build(:pageview, user_id: 2, timestamp: ~N[2021-01-03 00:10:00])
137+
])
138+
139+
{:ok, query} =
140+
QueryBuilder.build(site, %ParsedQueryParams{
141+
metrics: [:visitors],
142+
input_date_range: {:date_range, ~D[2021-01-02], ~D[2021-01-02]},
143+
dimensions: ["time:hour"],
144+
include: %QueryInclude{total_rows: true}
145+
})
146+
147+
%Stats.QueryResult{results: results, meta: meta} = Stats.query(site, query)
148+
149+
assert results == [
150+
%{dimensions: ["2021-01-02 00:00:00"], metrics: [1]},
151+
%{dimensions: ["2021-01-02 23:00:00"], metrics: [1]}
152+
]
153+
154+
assert meta[:total_rows] == 2
155+
end
156+
157+
test "time:hour does not include buckets from outside the query range (non-UTC timezone)",
158+
%{site: site} do
159+
# America/New_York is UTC-5 in January
160+
site = %{site | timezone: "America/New_York"}
161+
162+
populate_stats(site, [
163+
# 2020-12-31 23:55 in NYC (outside of query range)
164+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 04:55:00]),
165+
# 2021-01-01 00:10 in NYC (in query range)
166+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 05:10:00]),
167+
# 2021-01-01 23:55 in NYC (in query range)
168+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 04:55:00]),
169+
# 2021-01-02 00:10 in NYC (outside of query range)
170+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-02 05:10:00])
171+
])
172+
173+
{:ok, query} =
174+
QueryBuilder.build(site, %ParsedQueryParams{
175+
metrics: [:visitors],
176+
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-01]},
177+
dimensions: ["time:hour"]
178+
})
179+
180+
%Stats.QueryResult{results: results} = Stats.query(site, query)
181+
182+
assert results == [
183+
%{dimensions: ["2021-01-01 00:00:00"], metrics: [1]},
184+
%{dimensions: ["2021-01-01 23:00:00"], metrics: [1]}
185+
]
186+
end
187+
188+
test "time:minute does not include buckets from outside the query range",
189+
%{site: site} do
190+
populate_stats(site, [
191+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:05:00]),
192+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:20:00]),
193+
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:08:00]),
194+
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:10:00])
195+
])
196+
197+
{:ok, query} =
198+
QueryBuilder.build(site, %ParsedQueryParams{
199+
metrics: [:visitors],
200+
input_date_range: {:datetime_range, ~U[2021-01-01 00:08:00Z], ~U[2021-01-01 00:12:00Z]},
201+
dimensions: ["time:minute"],
202+
include: %QueryInclude{total_rows: true}
203+
})
204+
205+
%Stats.QueryResult{results: results, meta: meta} = Stats.query(site, query)
206+
207+
assert results == [
208+
%{dimensions: ["2021-01-01 00:08:00"], metrics: [2]},
209+
%{dimensions: ["2021-01-01 00:09:00"], metrics: [2]},
210+
%{dimensions: ["2021-01-01 00:10:00"], metrics: [2]},
211+
%{dimensions: ["2021-01-01 00:11:00"], metrics: [1]},
212+
%{dimensions: ["2021-01-01 00:12:00"], metrics: [1]}
213+
]
214+
215+
assert meta[:total_rows] == 5
216+
end
217+
218+
test "time:minute does not include buckets from outside the query range (non-UTC timezone)",
219+
%{site: site} do
220+
# America/New_York is UTC-5 in January
221+
site = %{site | timezone: "America/New_York"}
222+
223+
populate_stats(site, [
224+
# 2020-12-31 23:59:00 in NYC (outside of queried range)
225+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 04:59:00]),
226+
# 2021-01-01 00:02:00 in NYC (in queried range)
227+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 05:02:00]),
228+
# 2021-01-01 23:59:00 in NYC (in queried range)
229+
build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 04:59:00]),
230+
# 2021-01-02 00:01:00 in NYC (outside of queried range)
231+
build(:pageview, user_id: 2, timestamp: ~N[2021-01-02 05:01:00])
232+
])
233+
234+
{:ok, query} =
235+
QueryBuilder.build(site, %ParsedQueryParams{
236+
metrics: [:visitors],
237+
input_date_range: :day,
238+
relative_date: ~D[2021-01-01],
239+
dimensions: ["time:minute"]
240+
})
241+
242+
%Stats.QueryResult{results: results} = Stats.query(site, query)
243+
244+
assert results == [
245+
%{dimensions: ["2021-01-01 00:00:00"], metrics: [1]},
246+
%{dimensions: ["2021-01-01 00:01:00"], metrics: [1]},
247+
%{dimensions: ["2021-01-01 00:02:00"], metrics: [1]},
248+
%{dimensions: ["2021-01-01 23:59:00"], metrics: [1]}
249+
]
250+
end
251+
252+
test "time:day clamps sessions extending past the query range end into the last bucket",
253+
%{site: site} do
254+
populate_stats(site, [
255+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-31 23:55:00]),
256+
build(:pageview, user_id: 1, timestamp: ~N[2021-02-01 00:05:00])
257+
])
258+
259+
{:ok, query} =
260+
QueryBuilder.build(site, %ParsedQueryParams{
261+
metrics: [:visitors],
262+
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-31]},
263+
dimensions: ["time:day"]
264+
})
265+
266+
%Stats.QueryResult{results: results} = Stats.query(site, query)
267+
268+
# Without clamping the session would bucket to "2021-02-01" (outside range)
269+
assert results == [
270+
%{dimensions: ["2021-01-31"], metrics: [1]}
271+
]
272+
end
273+
274+
test "time:week clamps sessions extending past the query range end into the last bucket",
275+
%{site: site} do
276+
populate_stats(site, [
277+
build(:pageview, user_id: 1, timestamp: ~N[2021-01-31 23:55:00]),
278+
build(:pageview, user_id: 1, timestamp: ~N[2021-02-01 00:05:00])
279+
])
280+
281+
{:ok, query} =
282+
QueryBuilder.build(site, %ParsedQueryParams{
283+
metrics: [:visitors],
284+
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-01-31]},
285+
dimensions: ["time:week"]
286+
})
287+
288+
%Stats.QueryResult{results: results} = Stats.query(site, query)
289+
290+
# Without clamping the session would bucket to "2021-02-01" (outside range).
291+
# Clamped to Jan 31 23:59:59 -> toMonday(Jan 31) = Jan 25.
292+
assert results == [
293+
%{dimensions: ["2021-01-25"], metrics: [1]}
294+
]
295+
end
296+
297+
test "time:month clamps sessions extending past the query range end into the last bucket",
298+
%{site: site} do
299+
populate_stats(site, [
300+
build(:pageview, user_id: 1, timestamp: ~N[2021-02-28 23:55:00]),
301+
build(:pageview, user_id: 1, timestamp: ~N[2021-03-01 00:05:00])
302+
])
303+
304+
{:ok, query} =
305+
QueryBuilder.build(site, %ParsedQueryParams{
306+
metrics: [:visitors],
307+
input_date_range: {:date_range, ~D[2021-01-01], ~D[2021-02-28]},
308+
dimensions: ["time:month"]
309+
})
310+
311+
%Stats.QueryResult{results: results} = Stats.query(site, query)
312+
313+
# Without clamping the session would bucket to "2021-03-01" (outside range).
314+
# Clamped to Feb 28 23:59:59 -> toStartOfMonth -> Feb 1.
315+
assert results == [
316+
%{dimensions: ["2021-02-01"], metrics: [1]}
317+
]
318+
end
319+
end
128320
end

0 commit comments

Comments
 (0)