from_ = [1,2,3,4,97,98,99]
to_ = [0,0,1,1,2,2,2]
def replace(column, from_, to_):
# initiate the expression with `pl.when`
branch = pl.when(pl.col(column) == from_[0]).then(to_[0])
# for every value add a `when.then`
for (from_value, to_value) in zip(from_, to_):
branch = branch.when(pl.col(column) == from_value).then(to_value)
# finish with an `otherwise`
return branch.otherwise(pl.col(column)).alias(column)
df.with_column(replace("a", from_, to_))
def replace(column: str, mapping: dict) -> pl.internals.expr.Expr:
"""
Create a polars expression that replaces a columns values.
Parameters
----------
column : str
Column name on which values should be replaced.
mapping : dict
Can be used to specify different replacement values for different existing values. For example,
``{'a': 'b', 'y': 'z'}`` replaces the value ‘a’ with ‘b’ and ‘y’ with ‘z’. Values not mentioned in ``mapping``
will stay the same.
Returns
-------
pl.internals.expr.Expr
Expression that contains instructions to replace values in ``column`` according to ``mapping``.
Raises
------
Exception
* If ``mapping`` is empty.
TypeError
* If ``column`` is not ``str``.
* If ``mapping`` is not ``dict``.
polars.exceptions.PanicException
* When ``mapping`` has keys or values that are not mappable to arrows format. Only catchable via BaseException.
See also https://pola-rs.github.io/polars-book/user-guide/datatypes.html.
Examples
--------
>>> import polars as pl
>>> df = pl.DataFrame({'fruit':['banana', 'apple', 'pie']})
>>> df
shape: (3, 1)
┌────────┐
│ fruit │
│ --- │
│ str │
╞════════╡
│ banana │
├╌╌╌╌╌╌╌╌┤
│ apple │
├╌╌╌╌╌╌╌╌┤
│ apple │
└────────┘
>>> df.with_column(replace(column='fruit', mapping={'apple': 'pomegranate'}))
shape: (3, 1)
┌─────────────┐
│ fruit │
│ --- │
│ str │
╞═════════════╡
│ banana │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ pomegranate │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ pomegranate │
└─────────────┘
"""
if not mapping:
raise Exception("Mapping can't be empty")
elif not isinstance(mapping, dict):
TypeError(f"mapping must be of type dict, but is type: {type(mapping)}")
if not isinstance(column, str):
raise TypeError(f"column must be of type str, but is type: {type(column)}")
branch = pl.when(pl.col(column) == list(mapping.keys())[0]).then(
list(mapping.values())[0]
)
for from_value, to_value in mapping.items():
branch = branch.when(pl.col(column) == from_value).then(to_value)
return branch.otherwise(pl.col(column)).alias(column)
4条答案
按热度按时间hgb9j2n61#
在极坐标中,您可以构建称为
if -> then -> otherwise
表达式的柱状if else statetements
。假设我们有
DataFrame
。我们希望将这些值替换为以下值:
我们可以这样写:
一个二个一个一个
∮不要重复你自己∮
现在,如果写得太快,这就变得非常乏味了,所以我们可以写一个函数,生成这些表达式供使用,我们是程序员,不是吗?
因此,要替换为您建议的值,您可以执行以下操作:
其输出:
r8uurelv2#
以防万一您也喜欢panda文档字符串,并希望将其作为utils函数放在repo中的某个位置
z2acfund3#
您也可以将
apply
与dict
一起使用,只要您为每个from_
选项指定了详尽的Map:其输出:
它会比ritchie46's answer慢一些,但是要简单得多。
s3fp2yjn4#
不能在注解中使用代码片段,所以我将发布这个简短的概括作为答案。
如果Map中缺少要Map的值,则它接受默认值(如果提供),否则它将认为该Map是标识Map。