From 2f3edf1d7a4d22b50f3173b678e6fdd247626ee6 Mon Sep 17 00:00:00 2001
From: booksword <qianyun210603@hotmail.com>
Date: Sun, 24 Jul 2022 16:01:11 +0800
Subject: [PATCH 1/8] bug fix: 1) 100 should be used to scale down
 percentileofscore return to 0-1, not length of array; 2) for (linear)
 weighted MA(n), weight should be n, n-1, ..., 1 instead of n-1, ..., 0

---
 qlib/data/ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index 1cbb1d2e628..2fe6acbd6d1 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -1165,7 +1165,7 @@ def rank(x):
             x1 = x[~np.isnan(x)]
             if x1.shape[0] == 0:
                 return np.nan
-            return percentileofscore(x1, x1[-1]) / len(x1)
+            return percentileofscore(x1, x1[-1]) / 100
 
         if self.N == 0:
             series = series.expanding(min_periods=1).apply(rank, raw=True)
@@ -1341,7 +1341,7 @@ def _load_internal(self, instrument, start_index, end_index, *args):
         # TODO: implement in Cython
 
         def weighted_mean(x):
-            w = np.arange(len(x))
+            w = np.arange(len(x)) + 1
             w = w / w.sum()
             return np.nanmean(w * x)
 

From 9e88ac39c46e295a909d348226cd62adb87517b3 Mon Sep 17 00:00:00 2001
From: booksword <qianyun210603@hotmail.com>
Date: Sat, 13 Aug 2022 09:42:02 +0800
Subject: [PATCH 2/8] use native pandas fucntion for rank

---
 qlib/data/ops.py | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index 2fe6acbd6d1..63aad7b3b25 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -34,8 +34,6 @@
 
 
 #################### Element-Wise Operator ####################
-
-
 class ElemOperator(ExpressionOps):
     """Element-wise Operator
 
@@ -216,9 +214,7 @@ class Not(NpElemOperator):
 
     Parameters
     ----------
-    feature_left : Expression
-        feature instance
-    feature_right : Expression
+    feature : Expression
         feature instance
 
     Returns
@@ -241,8 +237,6 @@ class PairOperator(ExpressionOps):
         feature instance or numeric value
     feature_right : Expression
         feature instance or numeric value
-    func : str
-        operator function
 
     Returns
     ----------
@@ -1157,20 +1151,11 @@ def __init__(self, feature, N):
 
     def _load_internal(self, instrument, start_index, end_index, *args):
         series = self.feature.load(instrument, start_index, end_index, *args)
-        # TODO: implement in Cython
-
-        def rank(x):
-            if np.isnan(x[-1]):
-                return np.nan
-            x1 = x[~np.isnan(x)]
-            if x1.shape[0] == 0:
-                return np.nan
-            return percentileofscore(x1, x1[-1]) / 100
 
         if self.N == 0:
-            series = series.expanding(min_periods=1).apply(rank, raw=True)
+            series = series.expanding(min_periods=1).rank(pct=True)
         else:
-            series = series.rolling(self.N, min_periods=1).apply(rank, raw=True)
+            series = series.rolling(self.N, min_periods=1).rank(pct=True)
         return series
 
 

From 11785c9e4ed3fd4f33e0f2c9776f488c45dfea88 Mon Sep 17 00:00:00 2001
From: booksword <qianyun210603@hotmail.com>
Date: Sat, 13 Aug 2022 09:46:16 +0800
Subject: [PATCH 3/8] remove useless import

---
 qlib/data/ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index 63aad7b3b25..a2805700aff 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -9,7 +9,6 @@
 import pandas as pd
 
 from typing import Union, List, Type
-from scipy.stats import percentileofscore
 from .base import Expression, ExpressionOps, Feature, PFeature
 from ..log import get_module_logger
 from ..utils import get_callable_kwargs

From b4f84082667ba1c44fe569a373eed48956028310 Mon Sep 17 00:00:00 2001
From: BookSword <qianyun210603@hotmail.com>
Date: Tue, 23 Aug 2022 16:19:17 +0800
Subject: [PATCH 4/8] require pandas 1.4+

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 0ca9f26ba97..ebd81bc2a57 100644
--- a/setup.py
+++ b/setup.py
@@ -49,7 +49,7 @@ def get_version(rel_path: str) -> str:
 # `estimator` may depend on other packages. In order to reduce dependencies, it is not written here.
 REQUIRED = [
     "numpy>=1.12.0",
-    "pandas>=0.25.1",
+    "pandas>=1.4.0",
     "scipy>=1.0.0",
     "requests>=2.18.0",
     "sacred>=0.7.4",

From 223f2c79163f1333609a3fab3ad07b3d25a73093 Mon Sep 17 00:00:00 2001
From: BookSword <qianyun210603@hotmail.com>
Date: Tue, 23 Aug 2022 18:52:25 +0800
Subject: [PATCH 5/8] rank for py37+pandas 1.3.5 compatibility

---
 qlib/data/ops.py | 25 +++++++++++++++++++++++--
 setup.py         |  2 +-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index a2805700aff..93b3266a39e 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -9,6 +9,7 @@
 import pandas as pd
 
 from typing import Union, List, Type
+from scipy.stats import percentileofscore
 from .base import Expression, ExpressionOps, Feature, PFeature
 from ..log import get_module_logger
 from ..utils import get_callable_kwargs
@@ -1147,16 +1148,36 @@ class Rank(Rolling):
 
     def __init__(self, feature, N):
         super(Rank, self).__init__(feature, N, "rank")
+        major_version, minor_version, *_ = pd.__version__.split('.')
+        self._load_internal = self._load_internal_pd14 \
+            if int(major_version) > 1 or int(major_version) == 1 and \
+        int(minor_version) >3 else self._load_internal_pd_below_13
 
-    def _load_internal(self, instrument, start_index, end_index, *args):
+    def _load_internal_pd14(self, instrument, start_index, end_index, *args):
         series = self.feature.load(instrument, start_index, end_index, *args)
-
         if self.N == 0:
             series = series.expanding(min_periods=1).rank(pct=True)
         else:
             series = series.rolling(self.N, min_periods=1).rank(pct=True)
         return series
 
+    # for compatiblity of python 3.7, which doesn't support pandas 1.4.0+ which implements Rolling.rank
+    def _load_internal_pd_below_13(self, instrument, start_index, end_index, *args):
+        series = self.feature.load(instrument, start_index, end_index, *args)
+        def rank(x):
+            if np.isnan(x[-1]):
+                return np.nan
+            x1 = x[~np.isnan(x)]
+            if x1.shape[0] == 0:
+                return np.nan
+            return percentileofscore(x1, x1[-1]) / 100
+
+        if self.N == 0:
+            series = series.expanding(min_periods=1).apply(rank, raw=True)
+        else:
+            series = series.rolling(self.N, min_periods=1).apply(rank, raw=True)
+        return series
+
 
 class Count(Rolling):
     """Rolling Count
diff --git a/setup.py b/setup.py
index ebd81bc2a57..0ca9f26ba97 100644
--- a/setup.py
+++ b/setup.py
@@ -49,7 +49,7 @@ def get_version(rel_path: str) -> str:
 # `estimator` may depend on other packages. In order to reduce dependencies, it is not written here.
 REQUIRED = [
     "numpy>=1.12.0",
-    "pandas>=1.4.0",
+    "pandas>=0.25.1",
     "scipy>=1.0.0",
     "requests>=2.18.0",
     "sacred>=0.7.4",

From ad05334266f89f9ac8cf72828ac4e1e20af6bbd1 Mon Sep 17 00:00:00 2001
From: BookSword <qianyun210603@hotmail.com>
Date: Tue, 23 Aug 2022 19:02:03 +0800
Subject: [PATCH 6/8] lint improvement

---
 qlib/data/ops.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index 93b3266a39e..b1f4d41f192 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -1149,9 +1149,11 @@ class Rank(Rolling):
     def __init__(self, feature, N):
         super(Rank, self).__init__(feature, N, "rank")
         major_version, minor_version, *_ = pd.__version__.split('.')
-        self._load_internal = self._load_internal_pd14 \
-            if int(major_version) > 1 or int(major_version) == 1 and \
-        int(minor_version) >3 else self._load_internal_pd_below_13
+        self._load_internal = (
+            self._load_internal_pd14
+            if int(major_version) > 1 or int(major_version) == 1 and int(minor_version) > 3
+            else self._load_internal_pd_below_13
+        )
 
     def _load_internal_pd14(self, instrument, start_index, end_index, *args):
         series = self.feature.load(instrument, start_index, end_index, *args)
@@ -1164,6 +1166,7 @@ def _load_internal_pd14(self, instrument, start_index, end_index, *args):
     # for compatiblity of python 3.7, which doesn't support pandas 1.4.0+ which implements Rolling.rank
     def _load_internal_pd_below_13(self, instrument, start_index, end_index, *args):
         series = self.feature.load(instrument, start_index, end_index, *args)
+
         def rank(x):
             if np.isnan(x[-1]):
                 return np.nan

From 33ed6c357fd518d5f1dea172efece0d5fab08b08 Mon Sep 17 00:00:00 2001
From: BookSword <qianyun210603@hotmail.com>
Date: Tue, 23 Aug 2022 19:45:15 +0800
Subject: [PATCH 7/8] lint black fix

---
 qlib/data/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index b1f4d41f192..cfc1ed9b1bd 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -1148,7 +1148,7 @@ class Rank(Rolling):
 
     def __init__(self, feature, N):
         super(Rank, self).__init__(feature, N, "rank")
-        major_version, minor_version, *_ = pd.__version__.split('.')
+        major_version, minor_version, *_ = pd.__version__.split(".")
         self._load_internal = (
             self._load_internal_pd14
             if int(major_version) > 1 or int(major_version) == 1 and int(minor_version) > 3

From ee68adccdc8c3b5a96c516847a65167083c55f9d Mon Sep 17 00:00:00 2001
From: BookSword <qianyun210603@hotmail.com>
Date: Tue, 30 Aug 2022 14:42:12 +0800
Subject: [PATCH 8/8] use hasattr instead of version to check whether
 rolling.rank is implemented

---
 qlib/data/ops.py | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index cfc1ed9b1bd..fe2ebc9f6d9 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -1148,25 +1148,15 @@ class Rank(Rolling):
 
     def __init__(self, feature, N):
         super(Rank, self).__init__(feature, N, "rank")
-        major_version, minor_version, *_ = pd.__version__.split(".")
-        self._load_internal = (
-            self._load_internal_pd14
-            if int(major_version) > 1 or int(major_version) == 1 and int(minor_version) > 3
-            else self._load_internal_pd_below_13
-        )
-
-    def _load_internal_pd14(self, instrument, start_index, end_index, *args):
-        series = self.feature.load(instrument, start_index, end_index, *args)
-        if self.N == 0:
-            series = series.expanding(min_periods=1).rank(pct=True)
-        else:
-            series = series.rolling(self.N, min_periods=1).rank(pct=True)
-        return series
 
     # for compatiblity of python 3.7, which doesn't support pandas 1.4.0+ which implements Rolling.rank
-    def _load_internal_pd_below_13(self, instrument, start_index, end_index, *args):
+    def _load_internal(self, instrument, start_index, end_index, *args):
         series = self.feature.load(instrument, start_index, end_index, *args)
 
+        rolling_or_expending = series.expanding(min_periods=1) if self.N == 0 else series.rolling(self.N, min_periods=1)
+        if hasattr(rolling_or_expending, "rank"):
+            return rolling_or_expending.rank(pct=True)
+
         def rank(x):
             if np.isnan(x[-1]):
                 return np.nan
@@ -1175,11 +1165,7 @@ def rank(x):
                 return np.nan
             return percentileofscore(x1, x1[-1]) / 100
 
-        if self.N == 0:
-            series = series.expanding(min_periods=1).apply(rank, raw=True)
-        else:
-            series = series.rolling(self.N, min_periods=1).apply(rank, raw=True)
-        return series
+        return rolling_or_expending.apply(rank, raw=True)
 
 
 class Count(Rolling):