如何检查一个值是否与python中的类型匹配?

34gzjxbg  于 2023-10-14  发布在  Python
关注(0)|答案(7)|浏览(102)

假设我有一个python函数,它的单个参数是一个非平凡类型:

from typing import List, Dict
ArgType = List[Dict[str, int]]  # this could be any non-trivial type
def myfun(a: ArgType) -> None:
    ...

.然后我有一个数据结构,我已经从JSON源解压缩:

import json
data = json.loads(...)

我的问题是:在将data用作myfun()的参数之前,我如何在运行时检查data是否有正确的类型用作myfun()的参数?

if not isCorrectType(data, ArgType):
    raise TypeError("data is not correct type")
else:
    myfun(data)
bmp9r5qi

bmp9r5qi1#

验证类型注解是一项重要的任务。Python不会自动执行此操作,而且编写自己的验证器也很困难,因为typing模块没有提供太多有用的接口。(事实上,自从python 3.5引入typing模块以来,它的内部已经发生了很大的变化,以至于它真的是一场噩梦。
下面是一个类型验证器函数,来自我的一个个人项目(代码墙警告):

import inspect
import typing

__all__ = ['is_instance', 'is_subtype', 'python_type', 'is_generic', 'is_base_generic', 'is_qualified_generic']

if hasattr(typing, '_GenericAlias'):
    # python 3.7
    def _is_generic(cls):
        if isinstance(cls, typing._GenericAlias):
            return True

        if isinstance(cls, typing._SpecialForm):
            return cls not in {typing.Any}

        return False

    def _is_base_generic(cls):
        if isinstance(cls, typing._GenericAlias):
            if cls.__origin__ in {typing.Generic, typing._Protocol}:
                return False

            if isinstance(cls, typing._VariadicGenericAlias):
                return True

            return len(cls.__parameters__) > 0

        if isinstance(cls, typing._SpecialForm):
            return cls._name in {'ClassVar', 'Union', 'Optional'}

        return False

    def _get_base_generic(cls):
        # subclasses of Generic will have their _name set to None, but
        # their __origin__ will point to the base generic
        if cls._name is None:
            return cls.__origin__
        else:
            return getattr(typing, cls._name)

    def _get_python_type(cls):
        """
        Like `python_type`, but only works with `typing` classes.
        """
        return cls.__origin__

    def _get_name(cls):
        return cls._name
else:
    # python <3.7
    if hasattr(typing, '_Union'):
        # python 3.6
        def _is_generic(cls):
            if isinstance(cls, (typing.GenericMeta, typing._Union, typing._Optional, typing._ClassVar)):
                return True

            return False

        def _is_base_generic(cls):
            if isinstance(cls, (typing.GenericMeta, typing._Union)):
                return cls.__args__ in {None, ()}

            if isinstance(cls, typing._Optional):
                return True

            return False
    else:
        # python 3.5
        def _is_generic(cls):
            if isinstance(cls, (typing.GenericMeta, typing.UnionMeta, typing.OptionalMeta, typing.CallableMeta, typing.TupleMeta)):
                return True

            return False

        def _is_base_generic(cls):
            if isinstance(cls, typing.GenericMeta):
                return all(isinstance(arg, typing.TypeVar) for arg in cls.__parameters__)

            if isinstance(cls, typing.UnionMeta):
                return cls.__union_params__ is None

            if isinstance(cls, typing.TupleMeta):
                return cls.__tuple_params__ is None

            if isinstance(cls, typing.CallableMeta):
                return cls.__args__ is None

            if isinstance(cls, typing.OptionalMeta):
                return True

            return False

    def _get_base_generic(cls):
        try:
            return cls.__origin__
        except AttributeError:
            pass

        name = type(cls).__name__
        if not name.endswith('Meta'):
            raise NotImplementedError("Cannot determine base of {}".format(cls))

        name = name[:-4]
        return getattr(typing, name)

    def _get_python_type(cls):
        """
        Like `python_type`, but only works with `typing` classes.
        """
        # Many classes actually reference their corresponding abstract base class from the abc module
        # instead of their builtin variant (i.e. typing.List references MutableSequence instead of list).
        # We're interested in the builtin class (if any), so we'll traverse the MRO and look for it there.
        for typ in cls.mro():
            if typ.__module__ == 'builtins' and typ is not object:
                return typ

        try:
            return cls.__extra__
        except AttributeError:
            pass

        if is_qualified_generic(cls):
            cls = get_base_generic(cls)

        if cls is typing.Tuple:
            return tuple

        raise NotImplementedError("Cannot determine python type of {}".format(cls))

    def _get_name(cls):
        try:
            return cls.__name__
        except AttributeError:
            return type(cls).__name__[1:]

if hasattr(typing.List, '__args__'):
    # python 3.6+
    def _get_subtypes(cls):
        subtypes = cls.__args__

        if get_base_generic(cls) is typing.Callable:
            if len(subtypes) != 2 or subtypes[0] is not ...:
                subtypes = (subtypes[:-1], subtypes[-1])

        return subtypes
else:
    # python 3.5
    def _get_subtypes(cls):
        if isinstance(cls, typing.CallableMeta):
            if cls.__args__ is None:
                return ()

            return cls.__args__, cls.__result__

        for name in ['__parameters__', '__union_params__', '__tuple_params__']:
            try:
                subtypes = getattr(cls, name)
                break
            except AttributeError:
                pass
        else:
            raise NotImplementedError("Cannot extract subtypes from {}".format(cls))

        subtypes = [typ for typ in subtypes if not isinstance(typ, typing.TypeVar)]
        return subtypes

def is_generic(cls):
    """
    Detects any kind of generic, for example `List` or `List[int]`. This includes "special" types like
    Union and Tuple - anything that's subscriptable, basically.
    """
    return _is_generic(cls)

def is_base_generic(cls):
    """
    Detects generic base classes, for example `List` (but not `List[int]`)
    """
    return _is_base_generic(cls)

def is_qualified_generic(cls):
    """
    Detects generics with arguments, for example `List[int]` (but not `List`)
    """
    return is_generic(cls) and not is_base_generic(cls)

def get_base_generic(cls):
    if not is_qualified_generic(cls):
        raise TypeError('{} is not a qualified Generic and thus has no base'.format(cls))

    return _get_base_generic(cls)

def get_subtypes(cls):
    return _get_subtypes(cls)

def _instancecheck_iterable(iterable, type_args):
    if len(type_args) != 1:
        raise TypeError("Generic iterables must have exactly 1 type argument; found {}".format(type_args))

    type_ = type_args[0]
    return all(is_instance(val, type_) for val in iterable)

def _instancecheck_mapping(mapping, type_args):
    return _instancecheck_itemsview(mapping.items(), type_args)

def _instancecheck_itemsview(itemsview, type_args):
    if len(type_args) != 2:
        raise TypeError("Generic mappings must have exactly 2 type arguments; found {}".format(type_args))

    key_type, value_type = type_args
    return all(is_instance(key, key_type) and is_instance(val, value_type) for key, val in itemsview)

def _instancecheck_tuple(tup, type_args):
    if len(tup) != len(type_args):
        return False

    return all(is_instance(val, type_) for val, type_ in zip(tup, type_args))

_ORIGIN_TYPE_CHECKERS = {}
for class_path, check_func in {
                        # iterables
                        'typing.Container': _instancecheck_iterable,
                        'typing.Collection': _instancecheck_iterable,
                        'typing.AbstractSet': _instancecheck_iterable,
                        'typing.MutableSet': _instancecheck_iterable,
                        'typing.Sequence': _instancecheck_iterable,
                        'typing.MutableSequence': _instancecheck_iterable,
                        'typing.ByteString': _instancecheck_iterable,
                        'typing.Deque': _instancecheck_iterable,
                        'typing.List': _instancecheck_iterable,
                        'typing.Set': _instancecheck_iterable,
                        'typing.FrozenSet': _instancecheck_iterable,
                        'typing.KeysView': _instancecheck_iterable,
                        'typing.ValuesView': _instancecheck_iterable,
                        'typing.AsyncIterable': _instancecheck_iterable,

                        # mappings
                        'typing.Mapping': _instancecheck_mapping,
                        'typing.MutableMapping': _instancecheck_mapping,
                        'typing.MappingView': _instancecheck_mapping,
                        'typing.ItemsView': _instancecheck_itemsview,
                        'typing.Dict': _instancecheck_mapping,
                        'typing.DefaultDict': _instancecheck_mapping,
                        'typing.Counter': _instancecheck_mapping,
                        'typing.ChainMap': _instancecheck_mapping,

                        # other
                        'typing.Tuple': _instancecheck_tuple,
                    }.items():
    try:
        cls = eval(class_path)
    except AttributeError:
        continue

    _ORIGIN_TYPE_CHECKERS[cls] = check_func

def _instancecheck_callable(value, type_):
    if not callable(value):
        return False

    if is_base_generic(type_):
        return True

    param_types, ret_type = get_subtypes(type_)
    sig = inspect.signature(value)

    missing_annotations = []

    if param_types is not ...:
        if len(param_types) != len(sig.parameters):
            return False

        # FIXME: add support for TypeVars

        # if any of the existing annotations don't match the type, we'll return False.
        # Then, if any annotations are missing, we'll throw an exception.
        for param, expected_type in zip(sig.parameters.values(), param_types):
            param_type = param.annotation
            if param_type is inspect.Parameter.empty:
                missing_annotations.append(param)
                continue

            if not is_subtype(param_type, expected_type):
                return False

    if sig.return_annotation is inspect.Signature.empty:
        missing_annotations.append('return')
    else:
        if not is_subtype(sig.return_annotation, ret_type):
            return False

    if missing_annotations:
        raise ValueError("Missing annotations: {}".format(missing_annotations))

    return True

def _instancecheck_union(value, type_):
    types = get_subtypes(type_)
    return any(is_instance(value, typ) for typ in types)

def _instancecheck_type(value, type_):
    # if it's not a class, return False
    if not isinstance(value, type):
        return False

    if is_base_generic(type_):
        return True

    type_args = get_subtypes(type_)
    if len(type_args) != 1:
        raise TypeError("Type must have exactly 1 type argument; found {}".format(type_args))

    return is_subtype(value, type_args[0])

_SPECIAL_INSTANCE_CHECKERS = {
    'Union': _instancecheck_union,
    'Callable': _instancecheck_callable,
    'Type': _instancecheck_type,
    'Any': lambda v, t: True,
}

def is_instance(obj, type_):
    if type_.__module__ == 'typing':
        if is_qualified_generic(type_):
            base_generic = get_base_generic(type_)
        else:
            base_generic = type_
        name = _get_name(base_generic)

        try:
            validator = _SPECIAL_INSTANCE_CHECKERS[name]
        except KeyError:
            pass
        else:
            return validator(obj, type_)

    if is_base_generic(type_):
        python_type = _get_python_type(type_)
        return isinstance(obj, python_type)

    if is_qualified_generic(type_):
        python_type = _get_python_type(type_)
        if not isinstance(obj, python_type):
            return False

        base = get_base_generic(type_)
        try:
            validator = _ORIGIN_TYPE_CHECKERS[base]
        except KeyError:
            raise NotImplementedError("Cannot perform isinstance check for type {}".format(type_))

        type_args = get_subtypes(type_)
        return validator(obj, type_args)

    return isinstance(obj, type_)

def is_subtype(sub_type, super_type):
    if not is_generic(sub_type):
        python_super = python_type(super_type)
        return issubclass(sub_type, python_super)

    # at this point we know `sub_type` is a generic
    python_sub = python_type(sub_type)
    python_super = python_type(super_type)
    if not issubclass(python_sub, python_super):
        return False

    # at this point we know that `sub_type`'s base type is a subtype of `super_type`'s base type.
    # If `super_type` isn't qualified, then there's nothing more to do.
    if not is_generic(super_type) or is_base_generic(super_type):
        return True

    # at this point we know that `super_type` is a qualified generic... so if `sub_type` isn't
    # qualified, it can't be a subtype.
    if is_base_generic(sub_type):
        return False

    # at this point we know that both types are qualified generics, so we just have to
    # compare their sub-types.
    sub_args = get_subtypes(sub_type)
    super_args = get_subtypes(super_type)
    return all(is_subtype(sub_arg, super_arg) for sub_arg, super_arg in zip(sub_args, super_args))

def python_type(annotation):
    """
    Given a type annotation or a class as input, returns the corresponding python class.

    Examples:

    ::
        >>> python_type(typing.Dict)
        <class 'dict'>
        >>> python_type(typing.List[int])
        <class 'list'>
        >>> python_type(int)
        <class 'int'>
    """
    try:
        mro = annotation.mro()
    except AttributeError:
        # if it doesn't have an mro method, it must be a weird typing object
        return _get_python_type(annotation)

    if Type in mro:
        return annotation.python_type
    elif annotation.__module__ == 'typing':
        return _get_python_type(annotation)
    else:
        return annotation

演示:

>>> is_instance([{'x': 3}], List[Dict[str, int]])
True
>>> is_instance([{'x': 3}, {'y': 7.5}], List[Dict[str, int]])
False

(As据我所知,它支持所有python版本,甚至是使用typing module backport的<3.5版本。

vql8enpb

vql8enpb2#

很尴尬的是,没有内置的函数,但typeguard附带了一个方便的check_type()函数:

>>> from typeguard import check_type
>>> from typing import List
>>> check_type("foo", [1,2,"3"], List[int])
Traceback (most recent call last):
...
TypeError: type of foo[2] must be int; got str instead

type of foo[2] must be int; got str instead

更多信息请参见:https://typeguard.readthedocs.io/en/latest/api.html#typeguard.check_type

bd1hkmkf

bd1hkmkf3#

首先,尽管我认为你已经意识到了,但为了完整起见,类型库包含了类型提示的类型。IDE使用这些类型提示来检查您的代码是否合理,并且还可以作为开发人员期望的类型的文档。
要检查变量是否是某个类型,我们必须使用isinstance函数。令人惊讶的是,我们可以使用类型库函数的直接类型,例如。

from typing import List

value = []
isinstance(value, List)

然而,对于嵌套结构,如List[Dict[str, int]],我们不能直接使用它,因为你会得到一个TypeError。你要做的是:
1.检查初始值是否为列表
1.检查列表中的每一项是否为dict类型
1.检查每个dict的每个键是否都是字符串,每个值是否都是int
不幸的是,对于严格的检查,python有点麻烦。但是,请注意Python使用了duck类型:如果它像一只鸭子,行为也像一只鸭子,那么它肯定是一只鸭子。

vhmi4jdf

vhmi4jdf4#

处理这种情况的常用方法是利用这样一个事实,即如果传递给myfun的对象没有所需的功能,则会引发相应的异常(通常是TypeErrorAttributeError)。因此,您可以执行以下操作:

try:
    myfun(data)
except (TypeError, AttributeError) as err:
    # Fallback for invalid types here.

你在问题中指出,如果传递的对象没有适当的结构,你会引发TypeError,但Python已经为你做了这件事。关键问题是你将如何处理这个案子。如果合适的话,还可以将try / except块移动到myfun中。在Python中输入时,你通常依赖duck typing:如果对象具有所需的功能,那么你不太关心它是什么类型,只要它服务于目的。
请看下面的例子。我们只需将数据传入函数,然后免费获得AttributeError(然后我们可以排除);无需手动类型检查:

>>> def myfun(data):
...     for x in data:
...             print(x.items())
... 
>>> data = json.loads('[[["a", 1], ["b", 2]], [["c", 3], ["d", 4]]]')
>>> myfun(data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<stdin>", line 3, in myfun
AttributeError: 'list' object has no attribute 'items'

如果您担心所产生的错误的有用性,您仍然可以except然后重新引发自定义异常(甚至更改异常的消息):

try:
    myfun(data)
except (TypeError, AttributeError) as err:
    raise TypeError('Data has incorrect structure') from err

try:
    myfun(data)
except (TypeError, AttributeError) as err:
    err.args = ('Data has incorrect structure',)
    raise

当使用第三方代码时,应该始终检查文档中将引发的异常。例如,numpy.inner报告在某些情况下将引发ValueError。当使用该函数时,我们不需要自己执行任何检查,而是依赖于它会在需要时引发错误的事实。当使用第三方代码时,不清楚它在某些情况下的行为,i.m.o.硬编码一个相应的类型检查器(见下文),而不是使用适用于任何类型的通用解决方案,这更容易和更清楚。这些情况应该是罕见的,无论如何,留下相应的评论,使您的同胞开发人员意识到这种情况。
typing库用于类型提示,因此它不会在运行时检查类型。当然,你可以手动完成,但这是相当麻烦的:

def type_checker(data):
    return (
        isinstance(data, list)
        and all(isinstance(x, dict) for x in list)
        and all(isinstance(k, str) and isinstance(v, int) for x in list for k, v in x.items())
    )

这与适当的注解一起仍然是一个可接受的解决方案,并且在需要类似数据结构的情况下可以重用。意图很清楚,代码很容易验证。

tzcvj98z

tzcvj98z5#

你必须手动检查你的嵌套类型结构--类型提示是不强制的。
像这样的检查最好使用ABC(Abstract Meta Classes)-这样用户就可以提供支持与默认dict/lists相同访问的派生类:

import collections.abc 

def isCorrectType(data):
    if isinstance(data, collections.abc.Collection): 
        for d in data:
            if isinstance(d,collections.abc.MutableMapping): 
                for key in d:
                    if isinstance(key,str) and isinstance(d[key],int):
                        pass
                    else:
                        return False
            else: 
                return False
    else:
        return False
    return True

输出量:

print ( isCorrectType( [ {"a":2} ] ))       # True
print ( isCorrectType( [ {2:2} ] ))         # False   
print ( isCorrectType( [ {"a":"a"} ] ))     # False   
print ( isCorrectType( [ {"a":2},1 ] ))     # False

多库:

相关信息:

另一种方法是遵循"Ask forgiveness not permission" - explain范式,简单地以您想要的形式使用您的数据,如果它不符合您想要的格式,则使用try:/except:。这更适合What is duck typing?-并允许(类似于ABC检查)消费者为您提供从list/dict派生的类,而它仍然可以工作。

7kjnsjlb

7kjnsjlb6#

如果你想做的只是json解析,你应该只使用pydantic
但是,我遇到了同样的问题,我想检查python对象的类型,所以我创建了一个比其他答案更简单的解决方案,至少处理嵌套列表和字典的复杂类型。
我用这个方法在https://gist.github.com/ramraj07/f537bf9f80b4133c65dd76c958d4c461上创建了一个gist
该方法的一些示例使用包括:

from typing import List, Dict, Union, Type, Optional

check_type('a', str)
check_type({'a': 1}, Dict[str, int])
check_type([{'a': [1.0]}, 'ten'], List[Union[Dict[str, List[float]], str]])
check_type(None, Optional[str])
check_type('abc', Optional[str])

下面的代码可供参考:

import typing

def check_type(obj: typing.Any, type_to_check: typing.Any, _external=True) -> None:

    try:
        if not hasattr(type_to_check, "_name"):
            # base-case
            if not isinstance(obj, type_to_check):
                raise TypeError
            return
        # type_to_check is from typing library
        type_name = type_to_check._name

        if type_to_check is typing.Any:
            pass
        elif type_name in ("List", "Tuple"):
            if (type_name == "List" and not isinstance(obj, list)) or (
                type_name == "Tuple" and not isinstance(obj, tuple)
            ):
                raise TypeError

            element_type = type_to_check.__args__[0]
            for element in obj:
                check_type(element, element_type, _external=False)
        elif type_name == "Dict":
            if not isinstance(obj, dict):
                raise TypeError
            if len(type_to_check.__args__) != 2:
                raise NotImplementedError(
                    "check_type can only accept Dict typing with separate annotations for key and values"
                )
            key_type, value_type = type_to_check.__args__
            for key, value in obj.items():
                check_type(key, key_type, _external=False)
                check_type(value, value_type, _external=False)
        elif type_name is None and type_to_check.__origin__ is typing.Union:
            type_options = type_to_check.__args__
            no_option_matched = True
            for type_option in type_options:
                try:
                    check_type(obj, type_option, _external=False)
                    no_option_matched = False
                    break
                except TypeError:
                    pass
            if no_option_matched:
                raise TypeError
        else:
            raise NotImplementedError(
                f"check_type method currently does not support checking typing of form '{type_name}'"
            )

    except TypeError:
        if _external:
            raise TypeError(
                f"Object {repr(obj)} is of type {_construct_type_description(obj)} "
                f"when {type_to_check} was expected"
            )
        raise TypeError()

def _construct_type_description(obj) -> str:
    def get_types_in_iterable(iterable) -> str:
        types = {_construct_type_description(element) for element in iterable}
        return types.pop() if len(types) == 1 else f"Union[{','.join(types)}]"

    if isinstance(obj, list):
        return f"List[{get_types_in_iterable(obj)}]"
    elif isinstance(obj, dict):
        key_types = get_types_in_iterable(obj.keys())
        val_types = get_types_in_iterable(obj.values())
        return f"Dict[{key_types}, {val_types}]"
    else:
        return type(obj).__name__
8qgya5xd

8qgya5xd7#

您可以从trycast模块中使用isassignable函数,该模块正是为此类用例设计的。
isassignable(value, T)类似于Python的内置isinstance(),但还支持检查任意类型的注解对象,包括 TypedDictsUnionsLiterals 和许多其他类型

>>> from trycast import isassignable
>>> from typing import List, Dict
>>> 
>>> isassignable([{'foo': 'bar'}], List[Dict[str, str]])
True
>>> isassignable([1, 2, 3], List[int])
True
>>> isassignable([1.2, 2.2], List[int])
False
>>> isassignable([1.2, 2.2], List[float])
True

相关问题