sqlite 如何通过包含标点符号的关键字检索文本?

9vw9lbht  于 2023-06-30  发布在  SQLite
关注(0)|答案(1)|浏览(120)

我使用SQLite和SQLAlchemy作为ORM。我想检索一些关键字的句子。
我考虑的情况下,一个关键字出现在开始/结束的句子使用table.message.startswith(keyword)table.message.endswith(keyword)(或table.message.ilike(f' {keyword} '))。但当关键字以标点符号开头或结尾时:

"This is first example. This will continue..."

我无法使用examplecontinue作为关键字检索任何内容。我尝试了table.message.ilike(f'%{keyword}%'),但检索到的句子不包含关键字。

9nvpjoqh

9nvpjoqh1#

我不知道这会有多慢,但我认为它可以改进。

from sqlalchemy import (
    create_engine,
    Integer,
    String,
)
from sqlalchemy.schema import (
    Column,
)
from sqlalchemy.sql import select, func, or_
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import Session

Base = declarative_base()

engine = create_engine("sqlite://", echo=True)

class Result(Base):
    __tablename__ = 'results'
    id = Column(Integer, primary_key=True, index=True)
    text = Column(String)


Base.metadata.create_all(engine)

should_match = ["This is first example. This will continue...", "Let us continue the example", "Continue reading example!!"]
should_not_match = ["To be continued but not matched", "Not a match", "Not matching again...."]
with Session(engine) as session:
    for text in should_match:
        session.add(Result(text=text))
    for text in should_not_match:
        session.add(Result(text=text))
    session.commit()

with Session(engine) as session:
    keywords = ["continue", "example"]
    and_args = []
    for keyword in keywords:
        and_args.append(Result.text.ilike(f'%{keyword}%'))
    stmt = select(Result.text).where(*and_args).subquery()
    to_replace = [".", ",", "?", ",", "!"]
    cond = stmt.c.text
    for sub in to_replace:
        cond = func.replace(cond, sub, " ")
    and_args = []
    for keyword in keywords:
        start_cond = cond.ilike(keyword+" %")
        middle_cond = cond.ilike("% "+keyword+" %")
        end_cond = cond.ilike("% "+keyword)
        and_args.append(or_(start_cond, middle_cond, end_cond))
    stmt2 = select(stmt.c.text).where(*and_args)
    matches = [r for r in session.execute(stmt2).scalars()]
    assert len(matches) == len(should_match), str(matches)

相关问题