我必须建立一个SQL查询,最好是SQLlite语法,它每天计算活动用户的绝对数量,以及活动用户在所有用户中所占的百分比。活动用户定义为某一天X活动的用户,如果该用户在时间间隔[X-6天,X]。结果应遵循以下模式每天1行3列(日期、活动用户数、活动用户百分比)
我们有以下模式:
-- Tables
-- tracks dimension
CREATE TABLE IF NOT EXISTS tracks(
recording_msid TEXT NOT NULL,
track_name TEXT NOT NULL,
release_msid TEXT NOT NULL,
release_name TEXT NOT NULL,
release_mbid TEXT DEFAULT NULL,
recording_mbid TEXT DEFAULT NULL,
isrc TEXT DEFAULT NULL,
track_number TEXT DEFAULT NULL,
track_mbid TEXT DEFAULT NULL,
work_mbids TEXT DEFAULT NULL,
dedup_tag TEXT DEFAULT NULL,
tags TEXT DEFAULT NULL,
release_group_mbid TEXT DEFAULT NULL,
artist_names TEXT DEFAULT NULL,
discnumber INT DEFAULT NULL,
release_artist_name TEXT DEFAULT NULL,
release_artist_names TEXT DEFAULT NULL,
rating INT DEFAULT NULL,
source TEXT DEFAULT NULL,
track_length INT DEFAULT NULL,
albumartist TEXT DEFAULT NULL,
totaldiscs INT DEFAULT NULL,
totaltracks INT DEFAULT NULL,
choosen_by_user TEXT DEFAULT NULL,
PRIMARY KEY (recording_msid,track_name)
);
-- users dimension
CREATE TABLE IF NOT EXISTS users(
user_name TEXT PRIMARY KEY,
spotify_id TEXT DEFAULT NULL,
spotify_album_artist_ids TEXT DEFAULT NULL,
spotify_album_id INTEGER DEFAULT NULL,
spotify_artist_ids TEXT DEFAULT NULL
);
-- artists dimension
CREATE TABLE IF NOT EXISTS artists(
artist_msid TEXT PRIMARY KEY,
artist_name TEXT NOT NULL,
artist_mbids TEXT DEFAULT NULL
);
-- fact table user history
CREATE TABLE IF NOT EXISTS user_history(
listened_at NUMERIC NOT NULL,
track_name TEXT NOT NULL,
recording_msid TEXT NOT NULL,
user_name TEXT NOT NULL,
artist_msid TEXT NOT NULL,
duration_ms INT DEFAULT NULL,
duration INT DEFAULT NULL,
listening_from NUMERIC DEFAULT NULL,
"date" NUMERIC DEFAULT NULL,
FOREIGN KEY(track_name) REFERENCES tracks(track_name),
FOREIGN KEY(recording_msid) REFERENCES tracks(recording_msid),
FOREIGN KEY(user_name) REFERENCES users(user_name),
FOREIGN KEY(artist_msid) REFERENCES artists(artist_msid)
);
任何帮助或简单的指向正确的方向都可以。谢谢!对模式的一些修改也是可能的
这是我尝试过的,但它只给一个日期组
with user_groups as(
select date(listened_at, 'unixepoch','localtime') as dt, count(user_name) as cnt
ROW_NUMBER() OVER (ORDER BY date(listened_at, 'unixepoch','localtime'))
as row_no
FROM user_history GROUP by date(listened_at, 'unixepoch','localtime')
)
select dt as date, cnt as active_users, cnt/SUM(cnt) as percentage
from user_groups
where row_no between 1 and 6;
1条答案
按热度按时间zour9fqk1#
尝试自连接。我不会写SQL,但知道这可以通过创建6路自连接来轻松解决。自连接在SQLlite中的工作方式与在其他数据库中相同