# 函数实现了描述符协议

# 模仿函数的__get__​​实现

class MyDescriptor:
    def __get__(self, instance, owner):
        print(locals())
        if instance is not None:
            return "bound_method"
        return "function"
  

class MyClass:
    desc = MyDescriptor()

obj = MyClass()

print(MyClass.desc)     # function
print(obj.desc)         #  bound_methodclass MyDescriptor:

# 探究

# 测试

class A:
    def fun(*args,**kwargs):
        pass

def fun(*args,**kwargs):
        pass

a = A()

"""
>>> import des_fun as df
>>> df.a
<des_fun.A object at 0x0000027F3C7CDA48>
>>> df.A
<class 'des_fun.A'>
>>> df.A.fun
<function A.fun at 0x0000027F3C795E58>
>>> df.a.fun 
<bound method A.fun of <des_fun.A object at 0x0000027F3C7CDA48>>
"""

"""
>>> dis.dis(compile('df.a.fun()','','exec')) 
  1           0 LOAD_NAME                0 (df)
              2 LOAD_ATTR                1 (a)
              4 LOAD_METHOD              2 (fun)
              6 CALL_METHOD              0
              8 POP_TOP
             10 LOAD_CONST               0 (None)
             12 RETURN_VALUE
>>> dis.dis(compile('df.A.fun()','','exec')) 
  1           0 LOAD_NAME                0 (df)
              2 LOAD_ATTR                1 (A)
              4 LOAD_METHOD              2 (fun)
              6 CALL_METHOD              0
              8 POP_TOP
             10 LOAD_CONST               0 (None)
             12 RETURN_VALUE
>>> dis.dis(compile('df.fun()','','exec'))   
  1           0 LOAD_NAME                0 (df)
              2 LOAD_METHOD              1 (fun)
              4 CALL_METHOD              0
              6 POP_TOP
              8 LOAD_CONST               0 (None)
             10 RETURN_VALUE
>>> dis.dis(compile('df.fun','','exec'))   
  1           0 LOAD_NAME                0 (df)
              2 LOAD_ATTR                1 (fun)
              4 POP_TOP
              6 LOAD_CONST               0 (None)
              8 RETURN_VALUE
>>> dis.dis(compile('df.A.fun','','exec')) 
  1           0 LOAD_NAME                0 (df)
              2 LOAD_ATTR                1 (A)
              4 LOAD_ATTR                2 (fun)
              6 POP_TOP
              8 LOAD_CONST               0 (None)
             10 RETURN_VALUE
>>> dis.dis(compile('df.a.fun','','exec')) 
  1           0 LOAD_NAME                0 (df)
              2 LOAD_ATTR                1 (a)
              4 LOAD_ATTR                2 (fun)
              6 POP_TOP
              8 LOAD_CONST               0 (None)
             10 RETURN_VALUE
"""

"""
>>> from des_fun import *
>>> dis.dis(compile('A.fun()','','exec'))  
  1           0 LOAD_NAME                0 (A)
              2 LOAD_METHOD              1 (fun)
              4 CALL_METHOD              0
              6 POP_TOP
              8 LOAD_CONST               0 (None)
             10 RETURN_VALUE
>>> dis.dis(compile('a.fun()','','exec')) 
  1           0 LOAD_NAME                0 (a)
              2 LOAD_METHOD              1 (fun)
              4 CALL_METHOD              0
              6 POP_TOP
              8 LOAD_CONST               0 (None)
             10 RETURN_VALUE
>>> dis.dis(compile('fun()','','exec'))   
  1           0 LOAD_NAME                0 (fun)
              2 CALL_FUNCTION            0
              4 POP_TOP
              6 LOAD_CONST               0 (None)
              8 RETURN_VALUE
>>> dis.dis(compile('fun','','exec'))  
  1           0 LOAD_NAME                0 (fun)
              2 POP_TOP
              4 LOAD_CONST               0 (None)
              6 RETURN_VALUE
>>> dis.dis(compile('A.fun','','exec'))  
  1           0 LOAD_NAME                0 (A)
              2 LOAD_ATTR                1 (fun)
              4 POP_TOP
              6 LOAD_CONST               0 (None)
              8 RETURN_VALUE
>>> dis.dis(compile('a.fun','','exec'))  
  1           0 LOAD_NAME                0 (a)
              2 LOAD_ATTR                1 (fun)
              4 POP_TOP
              6 LOAD_CONST               0 (None)
              8 RETURN_VALUE
"""

"""
>>> f = A.fun
>>> dis.dis(compile('f','','exec'))   
  1           0 LOAD_NAME                0 (f)
              2 POP_TOP
              4 LOAD_CONST               0 (None)
              6 RETURN_VALUE
>>> dis.dis(compile('f()','','exec')) 
  1           0 LOAD_NAME                0 (f)
              2 CALL_FUNCTION            0
              4 POP_TOP
              6 LOAD_CONST               0 (None)
              8 RETURN_VALUE
>>> f = a.fun 
>>> dis.dis(compile('f','','exec'))   
  1           0 LOAD_NAME                0 (f)
              2 POP_TOP
              4 LOAD_CONST               0 (None)
              6 RETURN_VALUE
>>> dis.dis(compile('f()','','exec'))
  1           0 LOAD_NAME                0 (f)
              2 CALL_FUNCTION            0
              4 POP_TOP
              6 LOAD_CONST               0 (None)
              8 RETURN_VALUE
"""

# CALL_FUNCTION 和 CALL_METHOD

  • 实际上两者貌似没什么区别,根据测试只要function不是被直接调用都会走CALL_METHOD​-无论是module.fun​、class.fun​还是instance.fun​,只要是fun​都会直接走CALL_FUNCTION​-即使是令f=a.fun​这样子。

    在3.11中貌似有了很大的变化,需要注意,可能这两条指令都不存在了。

  • 因此在3.7版本中函数和方法的区别于CALL_FUNCTION​和CALL_METHOD​这两条指令是无关的。

# LOAD_ATTR

假设有如下字节码:

>>> dis.dis(compile('a.fun','','exec'))  
  1           0 LOAD_NAME                0 (a)
              2 LOAD_ATTR                1 (fun)
              4 POP_TOP
              6 LOAD_CONST               0 (None)
              8 RETURN_VALUE

Python\ceval.c​中寻找LOAD_ATTR​指令的实现。

        TARGET(LOAD_ATTR) {
            PyObject *name = GETITEM(names, oparg);
            PyObject *owner = TOP();
            PyObject *res = PyObject_GetAttr(owner, name);
            Py_DECREF(owner);
            SET_TOP(res);
            if (res == NULL)
                goto error;
            DISPATCH();
        }
  1. 明显其中TOP​​和SET_TOP​​分别是从栈顶取出对象和将对象放到栈顶,不做研究。

  2. GETITEM​​中有两个参数names、oparg,

    1. 我们可以从页面顶部获取这几个元素的定义

          int opcode;        /* Current opcode */	// 558行
          int oparg;         /* Current opcode argument, if any */	// 559行
          PyObject *names;	// 576行
      
    2. GETITEM​​是一种指令,用于获取一个序列对象(如列表、元组或字符串)中指定索引的元素,并将其推送到栈顶。其中names​​是一个列表对象,oparg​​是一个整数值,表示需要获取的元素的索引。GETITEM(names, oparg)​​的作用是从names​​列表中获取索引为oparg​​的元素,并将其赋值给指针变量name​​。

  3. 因此真正的玄机还是在PyObject_GetAttr​​中

    1. 其源码在Objects\object.c​​中,注意到其首先获取owner​​的类对象,然后从owner​​的类对象出发依次尝试tp_getattro​​和tp_getattr​​,这两个函数的唯一区别只是tp_getattro​​接收python字符串参数而tp_getattr​​接收C字符串参数,前者更适用于动态获取属性的场景,官方也更加推荐。

      PyObject *
      PyObject_GetAttr(PyObject *v, PyObject *name)
      {
          PyTypeObject *tp = Py_TYPE(v);
      
          if (!PyUnicode_Check(name)) {
              PyErr_Format(PyExc_TypeError,
                           "attribute name must be string, not '%.200s'",
                           name->ob_type->tp_name);
              return NULL;
          }
          if (tp->tp_getattro != NULL)
              return (*tp->tp_getattro)(v, name);
          if (tp->tp_getattr != NULL) {
              const char *name_str = PyUnicode_AsUTF8(name);
              if (name_str == NULL)
                  return NULL;
              return (*tp->tp_getattr)(v, (char *)name_str);
          }
          PyErr_Format(PyExc_AttributeError,
                       "'%.50s' object has no attribute '%U'",
                       tp->tp_name, name);
          return NULL;
      }
      
    2. tp_getattro​​的实现有通用实现和自定义实现两种,通用实现是PyObject_GenericGetAttr​​,自定义实现则是自定义的__getattribute__​​和__getattr__​​,这些魔术方法会自动被设置到tp_getattro​​上。

  4. PyObject_GenericGetAttr​​

    1. 在不考虑描述器的情况下,其逻辑很简单,查找属性时先从实例字典开始找,找不到则到类型字典中找,其中搜索类型字典的顺序取决于MRO方法解析顺序。

    2. 如果发现要查找的属性是描述器时,则会去尝试调用描述器的__get__​​、__set__​​、__delete__​​方法,并传入参数(描述器、实例、实例的类型)

    3. function类实现了__get__​​方法,因此在获取属性时或调用其__get__​​方法,而这个__get__​​方法的实现明显是会看人下菜碟,假设函数是定义在类型字典之中-当定义在实例字典中就不会有这样的问题,当我们从实例去访问这个函数时返回的是一个method​​,当我们从类型去访问这个函数时返回的是一个function​​。

      class A:
          def fun(*args,**kwargs):
              pass
      
      def fun(*args,**kwargs):
              pass
      
      a = A()
      
      """
      >>> import des_fun as df
      >>> df.a
      <des_fun.A object at 0x0000027F3C7CDA48>
      >>> df.A
      <class 'des_fun.A'>
      >>> df.A.fun
      <function A.fun at 0x0000027F3C795E58>
      >>> df.a.fun 
      <bound method A.fun of <des_fun.A object at 0x0000027F3C7CDA48>>
      """
      
    4. 找到PyFunctionObject​​的源码-Include\funcobject.h​​

      typedef struct {
          PyObject_HEAD
          PyObject *func_code;        /* A code object, the __code__ attribute */
          PyObject *func_globals;     /* A dictionary (other mappings won't do) */
          PyObject *func_defaults;    /* NULL or a tuple */
          PyObject *func_kwdefaults;  /* NULL or a dict */
          PyObject *func_closure;     /* NULL or a tuple of cell objects */
          PyObject *func_doc;         /* The __doc__ attribute, can be anything */
          PyObject *func_name;        /* The __name__ attribute, a string object */
          PyObject *func_dict;        /* The __dict__ attribute, a dict or NULL */
          PyObject *func_weakreflist; /* List of weak references */
          PyObject *func_module;      /* The __module__ attribute, can be anything */
          PyObject *func_annotations; /* Annotations, a dict or NULL */
          PyObject *func_qualname;    /* The qualified name */
      
          /* Invariant:
           *     func_closure contains the bindings for func_code->co_freevars, so
           *     PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code)
           *     (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0).
           */
      } PyFunctionObject;
      
    5. 其源码

      PyObject *
      PyObject_GenericGetAttr(PyObject *obj, PyObject *name)
      {
          return _PyObject_GenericGetAttrWithDict(obj, name, NULL, 0);
      }
      
      PyObject *
      _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name,
                                       PyObject *dict, int suppress)
      {
          /* Make sure the logic of _PyObject_GetMethod is in sync with
             this method.
      
             When suppress=1, this function suppress AttributeError.
          */
      
          PyTypeObject *tp = Py_TYPE(obj);
          PyObject *descr = NULL;
          PyObject *res = NULL;
          descrgetfunc f;
          Py_ssize_t dictoffset;
          PyObject **dictptr;
      
          if (!PyUnicode_Check(name)){
              PyErr_Format(PyExc_TypeError,
                           "attribute name must be string, not '%.200s'",
                           name->ob_type->tp_name);
              return NULL;
          }
          Py_INCREF(name);
      
          if (tp->tp_dict == NULL) {
              if (PyType_Ready(tp) < 0)
                  goto done;
          }
      
          // 在类型字典或父类字典中查找当前属性值
          descr = _PyType_Lookup(tp, name);
      
          f = NULL;
      
          if (descr != NULL) {
              Py_INCREF(descr);
              f = descr->ob_type->tp_descr_get;
      	// 是数据描述器-实现了__set__,且实现了__get__则调用,否则记住查找结果并继续
              if (f != NULL && PyDescr_IsData(descr)) {
                  res = f(descr, obj, (PyObject *)obj->ob_type);
                  if (res == NULL && suppress &&
                          PyErr_ExceptionMatches(PyExc_AttributeError)) {
                      PyErr_Clear();
                  }
                  // 执行结束了 不用管下面了
                  goto done;
              }
          }
          // 只有没有获取到描述器或者描述器没有__get__才会走到这一步
          // 传入的dict为NULL,因此一定会进这个分支 - 获取实例字典
          if (dict == NULL) {
              /* Inline _PyObject_GetDictPtr */
              // 这个offset是指dict相比于实例结构体偏移量,正则从头算起,负则从尾部算起,0表示没有-例如int没有__dict__
              dictoffset = tp->tp_dictoffset;
              if (dictoffset != 0) {
                  if (dictoffset < 0) {
                      Py_ssize_t tsize;
                      size_t size;
      
                      tsize = ((PyVarObject *)obj)->ob_size;
                      if (tsize < 0)
                          tsize = -tsize;
                      size = _PyObject_VAR_SIZE(tp, tsize);
                      assert(size <= PY_SSIZE_T_MAX);
      
                      dictoffset += (Py_ssize_t)size;
                      assert(dictoffset > 0);
                      assert(dictoffset % SIZEOF_VOID_P == 0);
                  }
                  dictptr = (PyObject **) ((char *)obj + dictoffset);
                  dict = *dictptr;
              }
          }
          // 如果实例有__dict__属性,则尝试从实例字典中获取
          if (dict != NULL) {
              Py_INCREF(dict);
              res = PyDict_GetItem(dict, name);
              //  如果实例字典中获取到了就跑路
              if (res != NULL) {
                  Py_INCREF(res);
                  Py_DECREF(dict);
                  goto done;
              }
              Py_DECREF(dict);
          }
        
          // 如果上面找到的描述器时非数据描述器,那么到了这里会再次尝试调用__get__
          if (f != NULL) {
              res = f(descr, obj, (PyObject *)Py_TYPE(obj));
              if (res == NULL && suppress &&
                      PyErr_ExceptionMatches(PyExc_AttributeError)) {
                  PyErr_Clear();
              }
              goto done;
          }
          // _PyType_Lookup 找到属性且非描述器,返回
          if (descr != NULL) {
              res = descr;
              descr = NULL;
              goto done;
          }
      
          // 必定触发,传入的是0
          if (!suppress) {
              PyErr_Format(PyExc_AttributeError,
                           "'%.50s' object has no attribute '%U'",
                           tp->tp_name, name);
          }
        done:
          Py_XDECREF(descr);
          Py_DECREF(name);
          return res;
      }
      
    6. 查找顺序总结:

      1. 类型的数据描述器-实现了set
      2. 实例字典
      3. 类型的非数据描述器或其他值
    7. 这种查找顺序(数据描述器比实例变量优先级高,非数据描述器比实例变量优先级低的原因)的原因:

      1. 有的描述器的优先级一定要比实例变量高-例如类型的__dict__​​

        >>> a.__dict__['__dict__']
        Traceback (most recent call last):
          File "<stdin>", line 1, in <module>
        KeyError: '__dict__'
        >>> A.__dict__['__dict__']
        <attribute '__dict__' of 'A' objects>
        >>> a.__dict__ is A.__dict__['__dict__'].__get__(a)
        True
        
      2. 非数据描述器优先级比实例变量低,因为用户大多数情况获取的是实例变量而不是类型变量

# func_descr_get

#PyFunction_Type

PyTypeObject PyFunction_Type = {
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    "function",
    sizeof(PyFunctionObject),
    0,
    (destructor)func_dealloc,                   /* tp_dealloc */
    0,                                          /* tp_print */
    0,                                          /* tp_getattr */
    0,                                          /* tp_setattr */
    0,                                          /* tp_reserved */
    (reprfunc)func_repr,                        /* tp_repr */
    0,                                          /* tp_as_number */
    0,                                          /* tp_as_sequence */
    0,                                          /* tp_as_mapping */
    0,                                          /* tp_hash */
    function_call,                              /* tp_call */
    0,                                          /* tp_str */
    0,                                          /* tp_getattro */
    0,                                          /* tp_setattro */
    0,                                          /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
    func_new__doc__,                            /* tp_doc */
    (traverseproc)func_traverse,                /* tp_traverse */
    0,                                          /* tp_clear */
    0,                                          /* tp_richcompare */
    offsetof(PyFunctionObject, func_weakreflist), /* tp_weaklistoffset */
    0,                                          /* tp_iter */
    0,                                          /* tp_iternext */
    0,                                          /* tp_methods */
    func_memberlist,                            /* tp_members */
    func_getsetlist,                            /* tp_getset */
    0,                                          /* tp_base */
    0,                                          /* tp_dict */
    func_descr_get,                             /* tp_descr_get */
    0,                                          /* tp_descr_set */
    offsetof(PyFunctionObject, func_dict),      /* tp_dictoffset */
    0,                                          /* tp_init */
    0,                                          /* tp_alloc */
    func_new,                                   /* tp_new */
};

# func_descr_get

/* Bind a function to an object */
static PyObject *
func_descr_get(PyObject *func, PyObject *obj, PyObject *type)
{
    if (obj == Py_None || obj == NULL) {
        Py_INCREF(func);
        return func;
    }
    return PyMethod_New(func, obj);
}

换成等价的Python实现则是:

class Function(object):
    . . .
    def __get__(self, obj, objtype=None):
        "Simulate func_descr_get() in Objects/funcobject.c"
        if obj is None:
            return self
        return types.MethodType(self, obj)

#PyMethod_New

PyObject *
PyMethod_New(PyObject *func, PyObject *self)
{
    PyMethodObject *im;
    if (self == NULL) {
        PyErr_BadInternalCall();
        return NULL;
    }
    im = free_list;
    if (im != NULL) {
        free_list = (PyMethodObject *)(im->im_self);
        (void)PyObject_INIT(im, &PyMethod_Type);
        numfree--;
    }
    else {
        im = PyObject_GC_New(PyMethodObject, &PyMethod_Type);
        if (im == NULL)
            return NULL;
    }
    im->im_weakreflist = NULL;
    Py_INCREF(func);
    im->im_func = func;
    Py_XINCREF(self);
    im->im_self = self;
    _PyObject_GC_TRACK(im);
    return (PyObject *)im;
}

# CALL_METHOD

#define PEEK(n)           (stack_pointer[-(n)])


        TARGET(CALL_METHOD) {
            /* Designed to work in tamdem with LOAD_METHOD. */
            PyObject **sp, *res, *meth;

            sp = stack_pointer;

            meth = PEEK(oparg + 2);
            if (meth == NULL) {
                /* `meth` is NULL when LOAD_METHOD thinks that it's not
                   a method call.

                   Stack layout:

                       ... | NULL | callable | arg1 | ... | argN
                                                            ^- TOP()
                                               ^- (-oparg)
                                    ^- (-oparg-1)
                             ^- (-oparg-2)

                   `callable` will be POPed by call_function.
                   NULL will will be POPed manually later.
                */
                res = call_function(&sp, oparg, NULL);
                stack_pointer = sp;
                (void)POP(); /* POP the NULL. */
            }
            else {
                /* This is a method call.  Stack layout:

                     ... | method | self | arg1 | ... | argN
                                                        ^- TOP()
                                           ^- (-oparg)
                                    ^- (-oparg-1)
                           ^- (-oparg-2)

                  `self` and `method` will be POPed by call_function.
                  We'll be passing `oparg + 1` to call_function, to
                  make it accept the `self` as a first argument.
                */
                res = call_function(&sp, oparg + 1, NULL);
                stack_pointer = sp;
            }

            PUSH(res);
            if (res == NULL)
                goto error;
            DISPATCH();
        }

# call_function

/* Issue #29227: Inline call_function() into _PyEval_EvalFrameDefault()
   to reduce the stack consumption. */
Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
{
    PyObject **pfunc = (*pp_stack) - oparg - 1;
    PyObject *func = *pfunc;
    PyObject *x, *w;
    Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames);
    Py_ssize_t nargs = oparg - nkwargs;
    PyObject **stack = (*pp_stack) - nargs - nkwargs;

    /* Always dispatch PyCFunction first, because these are
       presumed to be the most frequent callable object.
    */
    if (PyCFunction_Check(func)) {
        PyThreadState *tstate = PyThreadState_GET();
        C_TRACE(x, _PyCFunction_FastCallKeywords(func, stack, nargs, kwnames));
    }
    else if (Py_TYPE(func) == &PyMethodDescr_Type) {
        PyThreadState *tstate = PyThreadState_GET();
        if (nargs > 0 && tstate->use_tracing) {
            /* We need to create a temporary bound method as argument
               for profiling.

               If nargs == 0, then this cannot work because we have no
               "self". In any case, the call itself would raise
               TypeError (foo needs an argument), so we just skip
               profiling. */
            PyObject *self = stack[0];
            func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self));
            if (func != NULL) {
                C_TRACE(x, _PyCFunction_FastCallKeywords(func,
                                                         stack+1, nargs-1,
                                                         kwnames));
                Py_DECREF(func);
            }
            else {
                x = NULL;
            }
        }
        else {
            x = _PyMethodDescr_FastCallKeywords(func, stack, nargs, kwnames);
        }
    }
    else {
        if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
            /* Optimize access to bound methods. Reuse the Python stack
               to pass 'self' as the first argument, replace 'func'
               with 'self'. It avoids the creation of a new temporary tuple
               for arguments (to replace func with self) when the method uses
               FASTCALL. */
            PyObject *self = PyMethod_GET_SELF(func);
            Py_INCREF(self);
            func = PyMethod_GET_FUNCTION(func);
            Py_INCREF(func);
            Py_SETREF(*pfunc, self);
            nargs++;
            stack--;
        }
        else {
            Py_INCREF(func);
        }

        if (PyFunction_Check(func)) {
            x = _PyFunction_FastCallKeywords(func, stack, nargs, kwnames);
        }
        else {
            x = _PyObject_FastCallKeywords(func, stack, nargs, kwnames);
        }
        Py_DECREF(func);
    }

    assert((x != NULL) ^ (PyErr_Occurred() != NULL));

    /* Clear the stack of the function object. */
    while ((*pp_stack) > pfunc) {
        w = EXT_POP(*pp_stack);
        Py_DECREF(w);
    }

    return x;
}