The task is to exclude row by a group where the "word" field is NULL
Tables
CREATE TABLE `search_phrases` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`phrase` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
`frequency` bigint NOT NULL,
PRIMARY KEY (`id`),
KEY `search_phrases_phrase_index` (`phrase`),
KEY `search_phrases_frequency_index` (`frequency`)
) ENGINE=InnoDB AUTO_INCREMENT=1000001 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `search_words` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`word` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `search_words_word_unique` (`word`)
) ENGINE=InnoDB AUTO_INCREMENT=128287 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `search_combinable` (
`search_phrase_id` int unsigned NOT NULL,
`search_word_id` int unsigned NOT NULL,
KEY `search_combinable_search_phrase_id_foreign` (`search_phrase_id`),
KEY `search_combinable_search_word_id_foreign` (`search_word_id`),
CONSTRAINT `search_combinable_search_phrase_id_foreign` FOREIGN KEY (`search_phrase_id`) REFERENCES `search_phrases` (`id`),
CONSTRAINT `search_combinable_search_word_id_foreign` FOREIGN KEY (`search_word_id`) REFERENCES `search_words` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE `requested_words` (
`id` int unsigned NOT NULL AUTO_INCREMENT,
`word` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
`ts` timestamp NULL DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `requested_words_word_index` (`word`)
) ENGINE=InnoDB AUTO_INCREMENT=436 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
/* some sample data */
INSERT INTO search_phrases VALUES (1, 'men''s shorts', 1031588), (2, 'red shorts', 456000), (3, 'green shorts', 436000);
INSERT INTO search_words VALUES (1, 'men''s'), (2, 'shorts'), (3, 'red'), (4, 'green');
INSERT INTO search_combinable VALUES (1, 1), (1, 2), (2, 3), (2, 2), (3, 4), (3, 2);
INSERT INTO requested_words VALUES (1, 'men''s', '2023-03-01 07:45:49'), (2, 'shorts', '2023-03-01 07:45:49'), (3, 'red', '2023-03-01 07:45:49');
As a result, there should be phrases that contain words from the requested words table
select sp.id, sp.phrase, sp.frequency from search_phrases as sp
inner join search_combinable as sc on sc.search_phrase_id = sp.id
inner join search_words as sw on sw.id = sc.search_word_id
inner join requested_words as rw on rw.word = sw.word
where sp.id not in (
select sp.id from search_phrases as temp_sp
inner join search_combinable as sc on sc.search_phrase_id = temp_sp.id
inner join search_words as sw on sw.id = sc.search_word_id
left join requested_words as rw on rw.word = sw.word
where rw.word is null and temp_sp.id = sp.id
)
group by sp.id
order by sp.frequency desc
The execution time is about 29 seconds. It grows in proportion to the increase in the "request_table"
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|----|-------------|---------|------------|--------|-------------------------------------------------------------------------------------|--------------------------------------------|---------|-----------------------------|------|----------|----------------------------------------------|
| 1 | SIMPLE | rw | | index | requested_words_word_index | requested_words_word_index | 1022 | | 435 | 100 | Using index; Using temporary; Using filesort |
| 1 | SIMPLE | sw | | eq_ref | PRIMARY,search_words_word_unique | search_words_word_unique | 1022 | laravel.rw.word | 1 | 100 | Using index |
| 1 | SIMPLE | sc | | ref | search_combinable_search_phrase_id_foreign,search_combinable_search_word_id_foreign | search_combinable_search_word_id_foreign | 4 | laravel.sw.id | 17 | 100 | |
| 1 | SIMPLE | sp | | eq_ref | PRIMARY,search_phrases_phrase_index,search_phrases_frequency_index | PRIMARY | 4 | laravel.sc.search_phrase_id | 1 | 100 | |
| 1 | SIMPLE | temp_sp | | eq_ref | PRIMARY | PRIMARY | 4 | laravel.sc.search_phrase_id | 1 | 100 | Using where; Not exists; Using index |
| 1 | SIMPLE | sc | | ref | search_combinable_search_phrase_id_foreign,search_combinable_search_word_id_foreign | search_combinable_search_phrase_id_foreign | 4 | laravel.sc.search_phrase_id | 2 | 100 | |
| 1 | SIMPLE | sw | | eq_ref | PRIMARY | PRIMARY | 4 | laravel.sc.search_word_id | 1 | 100 | |
| 1 | SIMPLE | rw | | ref | requested_words_word_index | requested_words_word_index | 1022 | laravel.sw.word | 4 | 100 | Using where; Using index |
Dataset
search_phrases
| id | phrase | frequency |
| ------------ | ------------ | ------------ |
| 1 | men's shorts | 1031588 |
| 2 | red shorts | 456000 |
| 3 | green shorts | 436000 |
search_combinable
| search_phrase_id | search_word_id |
| ------------ | ------------ |
| 1 | 1 |
| 1 | 2 |
| 2 | 3 |
| 2 | 2 |
| 3 | 4 |
| 3 | 2 |
search_words
| id | word |
| ------------ | ------------ |
| 1 | men's |
| 2 | shorts |
| 3 | red |
| 4 | green |
requested_words
| id | word | ts |
| ------------ | ------------ | ------------ |
| 1 | men's | 2023-03-01 07:45:49 |
| 2 | shorts | 2023-03-01 07:45:49 |
| 3 | red | 2023-03-01 07:45:49 |
in the response, I expect to see the phrases: men's shorts and red shorts
P.S After adding the primary key to the search_combinable table
CREATE TABLE `search_combinable` (
`search_phrase_id` int unsigned NOT NULL,
`search_word_id` int unsigned NOT NULL,
PRIMARY KEY (`search_word_id`,`search_phrase_id`),
KEY `search_combinable_search_word_id_search_phrase_id_index` (`search_word_id`,`search_phrase_id`),
KEY `search_combinable_search_phrase_id_foreign` (`search_phrase_id`),
CONSTRAINT `search_combinable_search_phrase_id_foreign` FOREIGN KEY (`search_phrase_id`) REFERENCES `search_phrases` (`id`),
CONSTRAINT `search_combinable_search_word_id_foreign` FOREIGN KEY (`search_word_id`) REFERENCES `search_words` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
select sp.id, sp.phrase, sp.frequency
from search_phrases as sp
where not exists (
select 1
from search_phrases sp2
left join search_combinable as sc on sc.search_phrase_id = sp2.id
left join search_words as sw on sw.id = sc.search_word_id
left join requested_words as rw on rw.word = sw.word
where sp2.id = sp.id
and rw.word is null
)
order by sp.frequency desc;
id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | SIMPLE | sw | index | PRIMARY | search_words_word_unique | 1022 | 43359 | 100 | Using index; Using temporary; Using filesort | ||
1 | SIMPLE | rw | ref | requested_words_word_index | requested_words_word_index | 1022 | laravel.sw.word | 4 | 100 | Using index | |
1 | SIMPLE | sc | ref | PRIMARY,search_combinable_search_word_id_search_phrase_id_index,search_combinable_search_phrase_id_foreign | search_combinable_search_word_id_search_phrase_id_index | 4 | laravel.sw.id | 18 | 100 | Using index | |
1 | SIMPLE | sp | eq_ref | PRIMARY,search_phrases_phrase_index,search_phrases_frequency_index | PRIMARY | 4 | laravel.sc.search_phrase_id | 1 | 100 |
2条答案
按热度按时间mfuanj7w1#
您的联接表
search_combinable
没有PK,索引不正确。它应该是:可能PK和反向应该是相反的。你可以用你的数据集和已知的查询进行实验。
到目前为止我想出的最好的是:
对于您的测试数据集,它在0.93 - 0.97秒内返回47行。
我设法挤出一点通过预先加入
search_words
和requested_words
,但YMMV:0.68- 0.70秒
wnavrhmk2#
删除所有冗余,所有这些表都包含在查询中-您也不需要在where子句中使用另一组连接等: