December 9, 2024

CMU15445作业

学一学CMU的15445,课程地址

Homework #1 - SQL

要求在 SQLiteDuckdb 上编写一些查询代码并执行,还是有些难度的

  1. 查询至少得过一枚奖牌的教练(即相同国家和项目的运动员或队伍得过至少一枚奖牌的教练),并按以奖牌数为第一关键字降序、以名字为第二关键字字典序排序

    分析:

    首先我们要找出教练的国家和项目

    1
    select ch.name, ch.country_code, ch.discipline from coaches as ch;

    然后要找出奖牌的项目和胜者 code

    1
    select m.discipline, m.winner_code from medals as m;

    将两者通过 discipline 结合

    1
    select ch.name as ch_name, ch.country_code as ch_ct, m.winner_code as win from coaches as ch, medals as m where ch.discipline = m.discipline;

    先找运动员

    1
    2
    3
    4
    5
    select tmp.ch_name, count(*) as c
    from (select ch.name as ch_name, ch.country_code as ch_ct, m.winner_code as win from coaches as ch, medals as m where ch.discipline = m.discipline) as tmp, athletes as a
    where tmp.win = a.code and tmp.ch_ct = a.country_code
    group by tmp.ch_name
    having c > 0;

    再找队伍

    这个地方需要注意,队伍 relation 中一个 tuple 表示一个队员,所以一个队伍可能会有很多 tuple,我们需要的只是 code 和 country_code,所以需要子查询

    1
    select code, country_code from teams group by code;

    一个很有意思的事情,在 SQLite 中,上面的语句是可以正常执行的,但是在 Duckdb 中会报错,因为机器不知道 codecountry_code 之间存在关系,即不同的 code 对应不同的 country_code,所以若只以 code 分组会导致机器不知道要拿组内哪个 country_code 的值来输出,产生不准确的查询,要么也以 country_code 分组,要么加 any_value() 函数,表示随意取一个值,但是在 SQLite 中,没有这个函数

    1
    select code, any_value(country_code) from teams group by code;

    1
    2
    3
    4
    5
    select tmp.ch_name, count(*) as c
    from (select ch.name as ch_name, ch.country_code as ch_ct, m.winner_code as win from coaches as ch, medals as m where ch.discipline = m.discipline) as tmp, (select code, country_code as ct from teams group by code) as t
    where tmp.win = t.code and tmp.ch_ct = t.ct
    group by tmp.ch_name
    having c > 0;

    最后排序(丑的一批)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    -- SQLite
    select q1.name, sum(c) as s
    from
    (
    select tmp.ch_name as name, count(*) as c
    from (select ch.name as ch_name, ch.country_code as ch_ct, m.winner_code as win from coaches as ch, medals as m where ch.discipline = m.discipline) as tmp, athletes as a
    where tmp.win = a.code and tmp.ch_ct = a.country_code
    group by tmp.ch_name
    having c > 0
    union all
    select tmp.ch_name as name, count(*) as c
    from (select ch.name as ch_name, ch.country_code as ch_ct, m.winner_code as win from coaches as ch, medals as m where ch.discipline = m.discipline) as tmp, (select code, country_code as ct from teams group by code) as t
    where tmp.win = t.code and tmp.ch_ct = t.ct
    group by tmp.ch_name
    having c > 0
    ) as q1
    group by q1.name
    order by s desc, q1.name;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    -- Duckdb
    select q1.name, sum(c) as s
    from
    (
    select tmp.ch_name as name, count(*) as c
    from (select ch.name as ch_name, ch.country_code as ch_ct, m.winner_code as win from coaches as ch, medals as m where ch.discipline = m.discipline) as tmp, athletes as a
    where tmp.win = a.code and tmp.ch_ct = a.country_code
    group by tmp.ch_name
    having c > 0
    union all
    select tmp.ch_name as name, count(*) as c
    from (select ch.name as ch_name, ch.country_code as ch_ct, m.winner_code as win from coaches as ch, medals as m where ch.discipline = m.discipline) as tmp, (select code, any_value(country_code) as ct from teams group by code) as t
    where tmp.win = t.code and tmp.ch_ct = t.ct
    group by tmp.ch_name
    having c > 0
    ) as q1
    group by q1.name
    order by s desc, q1.name;

  2. 查询所有柔道选手的姓名和奖牌数,并按以奖牌数为第一关键字降序、以名字为第二关键字字典序排序

    分析:

    首先查询所有柔道选手

    1
    select name from athletes where disciplines = "['Judo']";

    还有一个很有意思的事情就是 Duckdb 不支持双引号,所以你需要这样写

    1
    select name from athletes where disciplines = '[''Judo'']';

    然后选出所有的柔道奖牌

    1
    select winner_code from medals where discipline = 'Judo';

    选手可能自己拿牌子,也可能通过队伍拿牌子,但是牌子数量是一定的

    先算个人拿的牌子

    1
    2
    3
    4
    select a.name, count(*)
    from athletes as a, medals as m
    where a.disciplines = "['Judo']" and m.discipline = 'Judo' and m.winner_code = a.code
    group by a.name;

    再算在队伍中拿的牌子

    1
    2
    3
    select a.name as name, t.code as t_code
    from athletes as a, teams as t
    where a.disciplines = "['Judo']" and a.code = t.athletes_code;

    1
    2
    3
    4
    5
    6
    7
    8
    select p.name, count(*)
    from (
    select a.name as name, t.code as t_code
    from athletes as a, teams as t
    where a.disciplines = "['Judo']" and a.code = t.athletes_code
    ) as p, medals as m
    where p.t_code = m.winner_code
    group by p.name;

    再合并排序,注意最后要加上奖牌数为 的选手

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    -- SQLite
    select q2.name, sum(c) as s
    from
    (
    select a.name as name, count(*) as c
    from athletes as a, medals as m
    where a.disciplines = "['Judo']" and m.discipline = 'Judo' and m.winner_code = a.code
    group by a.name
    union all
    select p.name as name, count(*) as c
    from (
    select a.name as name, t.code as t_code
    from athletes as a, teams as t
    where a.disciplines = "['Judo']" and a.code = t.athletes_code
    ) as p, medals as m
    where p.t_code = m.winner_code
    group by p.name
    union all
    select name,0 from athletes where disciplines = "['Judo']"
    ) as q2
    group by q2.name
    order by s desc, q2.name;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    -- Duckdb
    select q2.name, sum(c) as s
    from
    (
    select a.name as name, count(*) as c
    from athletes as a, medals as m
    where a.disciplines = '[''Judo'']' and m.discipline = 'Judo' and m.winner_code = a.code
    group by a.name
    union all
    select p.name as name, count(*) as c
    from (
    select a.name as name, t.code as t_code
    from athletes as a, teams as t
    where a.disciplines = '[''Judo'']' and a.code = t.athletes_code
    ) as p, medals as m
    where p.t_code = m.winner_code
    group by p.name
    union all
    select name,0 from athletes where disciplines = '[''Judo'']'
    ) as q2
    group by q2.name
    order by s desc, q2.name;

  3. 对于所有举办过田径项目的地点,列出所有在该地点比过赛的运动员,并按以其国籍所在国与代表国之间的距离为第一关键字降序、以名字为第二关键字字典序排序

    分析:

    先查所有举办过田径项目的地点所对应的比赛结果

    1
    select r.participant_code from results as r, venues as v where v.disciplines like '%Athletics%' and r.venue = v.venue;

    再查人(还是分两部分,一部分个人,一部分队伍)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    select a.name as ATHLETE_NAME, any_value(a.country_code) as REPRESENTED_COUNTRY_CODE, any_value(a.nationality_code) as NATIONALITY_COUNTRY_CODE
    from
    (
    select r.participant_code as pc
    from results as r, venues as v
    where v.disciplines like '%Athletics%' and r.venue = v.venue
    ) as rv, athletes as a
    where rv.pc = a.code
    group by a.name;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    select a.name as ATHLETE_NAME, any_value(a.country_code) as REPRESENTED_COUNTRY_CODE, any_value(a.nationality_code) as NATIONALITY_COUNTRY_CODE
    from
    (
    select r.participant_code as pc
    from results as r, venues as v
    where v.disciplines like '%Athletics%' and r.venue = v.venue
    ) as rv, teams as t, athletes as a
    where rv.pc = t.code and t.athletes_code = a.code
    group by a.name;

    再排序,有细节,注意只输出经纬度都不为空的数据,正确版本如下

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    -- Duckdb
    with res as
    (
    select a.code, any_value(a.name) as ATHLETE_NAME, any_value(a.country_code) as REPRESENTED_COUNTRY_CODE, any_value(a.nationality_code) as NATIONALITY_COUNTRY_CODE
    from
    (
    select r.participant_code as pc
    from results as r, venues as v
    where v.disciplines like '%Athletics%' and r.venue = v.venue
    ) as rv, athletes as a
    where rv.pc = a.code
    group by a.code
    union
    select a.code, any_value(a.name) as ATHLETE_NAME, any_value(a.country_code) as REPRESENTED_COUNTRY_CODE, any_value(a.nationality_code) as NATIONALITY_COUNTRY_CODE
    from
    (
    select r.participant_code as pc
    from results as r, venues as v
    where v.disciplines like '%Athletics%' and r.venue = v.venue
    ) as rv, teams as t, athletes as a
    where rv.pc = t.code and t.athletes_code = a.code
    group by a.code
    )
    select res.ATHLETE_NAME, res.REPRESENTED_COUNTRY_CODE, res.NATIONALITY_COUNTRY_CODE
    from res, countries as c1, countries as c2
    where
    (
    res.REPRESENTED_COUNTRY_CODE = c1.code and
    res.NATIONALITY_COUNTRY_CODE = c2.code and
    c1.Latitude is not null and
    c2.Latitude is not null and
    c1.Longitude is not null and
    c2.Longitude is not null
    )
    order by ((c1.Latitude-c2.Latitude)*(c1.Latitude-c2.Latitude)+(c1.Longitude-c2.Longitude)*(c1.Longitude-c2.Longitude)) desc, res.ATHLETE_NAME;

    在之前我犯了一个错误出现了如下的结果

    Std:

    image-20250111075602898

    My:

    image-20250111075649255

    怎么活?

    排查结果是,有一个重名的选手,也就是名字一样但编号不一样,union 的时候我没输出编号,只输出了名字,所以 union 语句自己给去重了,警钟长鸣

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    -- SQLite
    with res as
    (
    select a.code, a.name as ATHLETE_NAME, a.country_code as REPRESENTED_COUNTRY_CODE, a.nationality_code as NATIONALITY_COUNTRY_CODE
    from
    (
    select r.participant_code as pc
    from results as r, venues as v
    where v.disciplines like '%Athletics%' and r.venue = v.venue
    ) as rv, athletes as a
    where rv.pc = a.code
    group by a.code
    union
    select a.code, a.name as ATHLETE_NAME, a.country_code as REPRESENTED_COUNTRY_CODE, a.nationality_code as NATIONALITY_COUNTRY_CODE
    from
    (
    select r.participant_code as pc
    from results as r, venues as v
    where v.disciplines like '%Athletics%' and r.venue = v.venue
    ) as rv, teams as t, athletes as a
    where rv.pc = t.code and t.athletes_code = a.code
    group by a.code
    )
    select res.ATHLETE_NAME, res.REPRESENTED_COUNTRY_CODE, res.NATIONALITY_COUNTRY_CODE
    from res, countries as c1, countries as c2
    where
    (
    res.REPRESENTED_COUNTRY_CODE = c1.code and
    res.NATIONALITY_COUNTRY_CODE = c2.code and
    c1.Latitude is not null and
    c2.Latitude is not null and
    c1.Longitude is not null and
    c2.Longitude is not null
    )
    order by ((c1.Latitude-c2.Latitude)*(c1.Latitude-c2.Latitude)+(c1.Longitude-c2.Longitude)*(c1.Longitude-c2.Longitude)) desc, res.ATHLETE_NAME;

  4. 对于每一天,找到当天在前五名(包括)中出现次数最多的国家,同时列出其人口排名和 GDP 排名,并按日期升序排序

    分析:

    先查每一天各个国家在前五名的记录,注意这里是 union all

    1
    2
    3
    4
    5
    6
    7
    	select r.date as date, a.country_code as country_code, r.rank
    from results as r, athletes as a
    where r.participant_code = a.code and r.rank <= 5 and r.rank is not null
    union all
    select r.date as date, t.country_code as country_code, r.rank
    from results as r, (select teams.code as code, any_value(teams.country_code) as country_code from teams group by teams.code) as t
    where r.participant_code = t.code and r.rank <= 5 and r.rank is not null;

    再通过横向查询算出每天各个国家在前五名中出现的次数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    with tms as 
    (
    select r.date as date, a.country_code as country_code, r.rank
    from results as r, athletes as a
    where r.participant_code = a.code and r.rank <= 5 and r.rank is not null
    union all
    select r.date as date, t.country_code as country_code, r.rank
    from results as r, (select teams.code as code, any_value(teams.country_code) as country_code from teams group by teams.code) as t
    where r.participant_code = t.code and r.rank <= 5 and r.rank is not null
    )
    select distinct t1.date, t1.country_code, times.c
    from tms as t1,
    lateral
    (
    select count(*) as c
    from tms as t2
    where t2.date = t1.date and t2.country_code = t1.country_code
    ) as times
    order by t1.date asc;

    利用窗口函数得到每天次数最大的一个 tuple

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    with max_t as
    (
    select
    *,
    row_number() over (partition by mdt.date order by mdt.c desc, mdt.country_code) as rk
    from
    (
    with tms as
    (
    select r.date as date, a.country_code as country_code, r.rank
    from results as r, athletes as a
    where r.participant_code = a.code and r.rank <= 5 and r.rank is not null
    union all
    select r.date as date, t.country_code as country_code, r.rank
    from results as r, (select teams.code as code, any_value(teams.country_code) as country_code from teams group by teams.code) as t
    where r.participant_code = t.code and r.rank <= 5 and r.rank is not null
    )
    select distinct t1.date as date, t1.country_code as country_code, times.c as c
    from tms as t1,
    lateral
    (
    select count(*) as c
    from tms as t2
    where t2.date = t1.date and t2.country_code = t1.country_code
    ) as times
    ) as mdt
    )
    select m.date, m.country_code, m.c
    from max_t as m
    where rk = 1
    order by m.date asc;

    查询 GDP_rank 和 populartion_rank

    1
    2
    3
    4
    5
    6
    7
    8
    select 
    code,
    rank() over (order by Population desc) as rk_pop,
    rank() over (order by "GDP ($ per capita)" desc) as rk_GDP
    from countries
    where
    "GDP ($ per capita)" is not null and
    Population is not null;

    合并!排序!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    -- Duckdb
    with max_t as
    (
    select
    *,
    row_number() over (partition by mdt.date order by mdt.c desc, mdt.country_code) as rk
    from
    (
    with tms as
    (
    select r.date as date, a.country_code as country_code, r.rank
    from results as r, athletes as a
    where r.participant_code = a.code and r.rank <= 5 and r.rank is not null
    union all
    select r.date as date, t.country_code as country_code, r.rank
    from results as r, (select teams.code as code, any_value(teams.country_code) as country_code from teams group by teams.code) as t
    where r.participant_code = t.code and r.rank <= 5 and r.rank is not null
    )
    select distinct t1.date as date, t1.country_code as country_code, times.c as c
    from tms as t1,
    lateral
    (
    select count(*) as c
    from tms as t2
    where t2.date = t1.date and t2.country_code = t1.country_code
    ) as times
    ) as mdt
    )
    select m.date as date, m.country_code as country_code, m.c as top5_appearances, ct_info.rk_GDP as gdp_rank, ct_info.rk_pop as population_rank
    from
    max_t as m,
    (
    select
    code,
    rank() over (order by Population desc) as rk_pop,
    rank() over (order by "GDP ($ per capita)" desc) as rk_GDP
    from countries
    where
    "GDP ($ per capita)" is not null and
    Population is not null
    ) as ct_info
    where rk = 1 and ct_info.code = m.country_code
    order by m.date asc;

    SQLite 不支持横向查询 lateral 关键字

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    -- SQLite
    with max_t as
    (
    select
    *,
    row_number() over (partition by mdt.date order by mdt.c desc, mdt.country_code) as rk
    from
    (
    with tms as
    (
    select r.date as date, a.country_code as country_code, r.rank
    from results as r, athletes as a
    where r.participant_code = a.code and r.rank <= 5 and r.rank is not null
    union all
    select r.date as date, t.country_code as country_code, r.rank
    from results as r, (select teams.code as code, teams.country_code as country_code from teams group by teams.code) as t
    where r.participant_code = t.code and r.rank <= 5 and r.rank is not null
    )
    select distinct t1.date as date, t1.country_code as country_code, times.c as c
    from tms as t1
    left join
    (
    select t2.date, t2.country_code, count(*) as c
    from tms as t2
    group by t2.date, t2.country_code
    ) as times
    on t1.date = times.date and t1.country_code = times.country_code
    ) as mdt
    )
    select m.date as date, m.country_code as country_code, m.c as top5_appearances, ct_info.rk_GDP as gdp_rank, ct_info.rk_pop as population_rank
    from
    max_t as m,
    (
    select
    code,
    rank() over (order by Population desc) as rk_pop,
    rank() over (order by "GDP ($ per capita)" desc) as rk_GDP
    from countries
    where
    "GDP ($ per capita)" is not null and
    Population is not null
    ) as ct_info
    where rk = 1 and ct_info.code = m.country_code
    order by m.date asc;

  5. 对比于东京奥运会,列出在金牌数量上有最大进步的五个国家,对于这五个国家,列出所属所有全女生的队伍,并按以提升的金牌数为第一关键字降序、以国家 code 为第二关键字字典序、以队伍 code 为第三关键字字典序排序

    分析:

    先查出所有国家的金牌数量

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    select s1.cc, sum(s)
    from
    (
    select c.code as cc, count(*) as s
    from medals as m, countries as c, athletes as a, medal_info as mi
    where
    m.winner_code = a.code and
    a.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    union all
    select c.code as cc, count(*) as s
    from medals as m, countries as c, (select code, any_value(country_code) as country_code from teams group by code) as t, medal_info as mi
    where
    m.winner_code = t.code and
    t.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    ) as s1
    group by cc;

    再查出所有国家进步的金牌数量,以及前五

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    select 
    s1.cc,
    (sum(s)-any_value(tm.gold_medal)) as increased_gold_medal_number,
    rank() over (order by increased_gold_medal_number desc) as rk
    from
    (
    select c.code as cc, count(*) as s
    from medals as m, countries as c, athletes as a, medal_info as mi
    where
    m.winner_code = a.code and
    a.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    union all
    select c.code as cc, count(*) as s
    from medals as m, countries as c, (select code, any_value(country_code) as country_code from teams group by code) as t, medal_info as mi
    where
    m.winner_code = t.code and
    t.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    ) as s1 left join tokyo_medals as tm
    on s1.cc = tm.country_code
    where tm.gold_medal is not null
    group by cc
    order by rk
    limit 5;

    找全女队

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    with sm1 as
    (
    select t.code as code, count(*) as c, any_value(t.country_code) as cc
    from teams as t, athletes as a
    where t.athletes_code = a.code
    group by t.code
    ),
    sm2 as
    (
    select t.code as code, count(*) as c, any_value(t.country_code) as cc
    from teams as t, athletes as a, gender as g
    where
    t.athletes_code = a.code and
    a.gender = g.id and
    g.name = 'Female'
    group by t.code
    )
    select sm1.cc, sm1.code
    from sm1, sm2
    where sm1.code = sm2.code and sm1.c = sm2.c;

    合并!排序!

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    -- Duckdb
    with tmp as
    (
    select
    s1.cc,
    (sum(s)-any_value(tm.gold_medal)) as increased_gold_medal_number,
    rank() over (order by increased_gold_medal_number desc) as rk
    from
    (
    select c.code as cc, count(*) as s
    from medals as m, countries as c, athletes as a, medal_info as mi
    where
    m.winner_code = a.code and
    a.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    union all
    select c.code as cc, count(*) as s
    from medals as m, countries as c, (select code, any_value(country_code) as country_code from teams group by code) as t, medal_info as mi
    where
    m.winner_code = t.code and
    t.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    ) as s1 left join tokyo_medals as tm
    on s1.cc = tm.country_code
    where tm.gold_medal is not null
    group by cc
    order by rk
    limit 5
    ),
    sm1 as
    (
    select t.code as code, count(*) as c, any_value(t.country_code) as cc
    from teams as t, athletes as a
    where t.athletes_code = a.code
    group by t.code
    ),
    sm2 as
    (
    select t.code as code, count(*) as c, any_value(t.country_code) as cc
    from teams as t, athletes as a, gender as g
    where
    t.athletes_code = a.code and
    a.gender = g.id and
    g.name = 'Female'
    group by t.code
    )
    select tmp.cc as country_code, tmp.increased_gold_medal_number as increased_gold_medal_number, sm1.code as team_code
    from sm1, sm2, tmp
    where tmp.cc = sm1.cc and sm1.code = sm2.code and sm1.c = sm2.c
    order by increased_gold_medal_number desc, country_code, team_code;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    -- SQLite
    with tmp as
    (
    select
    s1.cc,
    (sum(s)-tm.gold_medal) as increased_gold_medal_number,
    rank() over (order by (sum(s)-tm.gold_medal) desc) as rk
    from
    (
    select c.code as cc, count(*) as s
    from medals as m, countries as c, athletes as a, medal_info as mi
    where
    m.winner_code = a.code and
    a.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    union all
    select c.code as cc, count(*) as s
    from medals as m, countries as c, (select code, country_code as country_code from teams group by code) as t, medal_info as mi
    where
    m.winner_code = t.code and
    t.country_code = c.code and
    mi.code = m.medal_code and
    mi.name = 'Gold Medal'
    group by c.code
    ) as s1 left join tokyo_medals as tm
    on s1.cc = tm.country_code
    where tm.gold_medal is not null
    group by cc
    order by rk
    limit 5
    ),
    sm1 as
    (
    select t.code as code, count(*) as c, t.country_code as cc
    from teams as t, athletes as a
    where t.athletes_code = a.code
    group by t.code
    ),
    sm2 as
    (
    select t.code as code, count(*) as c, t.country_code as cc
    from teams as t, athletes as a, gender as g
    where
    t.athletes_code = a.code and
    a.gender = g.id and
    g.name = 'Female'
    group by t.code
    )
    select tmp.cc as country_code, tmp.increased_gold_medal_number as increased_gold_medal_number, sm1.code as team_code
    from sm1, sm2, tmp
    where tmp.cc = sm1.cc and sm1.code = sm2.code and sm1.c = sm2.c
    order by increased_gold_medal_number desc, country_code, team_code;

关于本文

由 wsy_jim 撰写, 采用 CC BY-NC 4.0 许可协议.

#CMU#Databases