import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.randn(1000, 4),  # random variation everyday, an array of 1000 x 4
                   columns=['A', 'B', 'C', 'D'],
                   index=pd.date_range('1/1/2000', periods=1000))
df = df.cumsum()  # cumulative sum to make something like a regular variation (temperature, stock...)
df

df['M'] = (df.A + df.B + df.C + df.D) / 4
df.head()

del df['M']                  # I remove the M column to come back to the initial dataframe
df['M'] = df.mean(axis=1)
df.head()

df['AB'] = df.apply(lambda l:  l.iloc[0] if l.iloc[0] > 0 else l.iloc[1], axis=1)
df.head()

def mean_deviation(w):
    try:
        return np.abs(w - w.mean()).mean()
    except:
        return np.nan
    
df.apply(mean_deviation)

A      5.744641
B      3.261884
C     14.439678
D      7.034502
M      4.304306
AB     2.993275
dtype: float64

df.describe()  # get statistics

df.quantile(0.1)

A    -20.125984
B     -2.384347
C    -36.765956
D      2.585649
M     -9.772580
AB    -2.060023
Name: 0.1, dtype: float64

	A	B	C	D
2000-01-01	-0.158289	1.461346	0.391050	-0.600887
2000-01-02	0.228089	0.497870	1.717566	-1.410468
2000-01-03	0.046438	1.639831	1.958727	-1.200243
2000-01-04	-0.158604	1.389162	3.622063	-1.991218
2000-01-05	-0.261653	3.026403	3.543351	-1.185131
...	...	...	...	...
2002-09-22	-7.221154	-1.085731	-32.496946	-0.452474
2002-09-23	-7.510201	0.336649	-33.244903	0.817182
2002-09-24	-7.225584	1.035569	-32.951018	0.669745
2002-09-25	-7.462259	0.575563	-34.578246	2.074575
2002-09-26	-7.245574	-0.330140	-35.658318	2.725504

	A	B	C	D	M
2000-01-01	-0.158289	1.461346	0.391050	-0.600887	0.273305
2000-01-02	0.228089	0.497870	1.717566	-1.410468	0.258265
2000-01-03	0.046438	1.639831	1.958727	-1.200243	0.611188
2000-01-04	-0.158604	1.389162	3.622063	-1.991218	0.715351
2000-01-05	-0.261653	3.026403	3.543351	-1.185131	1.280742

	A	B	C	D	M
2000-01-01	-0.158289	1.461346	0.391050	-0.600887	0.273305
2000-01-02	0.228089	0.497870	1.717566	-1.410468	0.258265
2000-01-03	0.046438	1.639831	1.958727	-1.200243	0.611188
2000-01-04	-0.158604	1.389162	3.622063	-1.991218	0.715351
2000-01-05	-0.261653	3.026403	3.543351	-1.185131	1.280742

	A	B	C	D	M	AB
2000-01-01	-0.158289	1.461346	0.391050	-0.600887	0.273305	1.461346
2000-01-02	0.228089	0.497870	1.717566	-1.410468	0.258265	0.228089
2000-01-03	0.046438	1.639831	1.958727	-1.200243	0.611188	0.046438
2000-01-04	-0.158604	1.389162	3.622063	-1.991218	0.715351	1.389162
2000-01-05	-0.261653	3.026403	3.543351	-1.185131	1.280742	3.026403

	A	B	C	D	M	AB
count	1000.000000	1000.000000	1000.000000	1000.000000	1000.000000	1000.000000
mean	-9.206839	2.912079	-14.842406	11.441330	-2.423959	2.805616
std	7.278487	4.195483	16.302703	8.914450	5.043540	3.895437
min	-27.937872	-8.017698	-43.343807	-4.575464	-12.912442	-8.017698
25%	-13.730769	0.325002	-29.575716	5.694189	-7.197366	0.544945
50%	-8.626063	2.909592	-15.464579	8.665304	-1.293571	2.642403
75%	-4.553952	5.557152	-0.789050	15.794403	1.179606	5.303396
max	9.312030	16.468624	15.788386	36.098498	8.417791	16.468624

Calculations with Pandas¶

Modify data¶

Apply a function¶

Table statistics¶

Pandas profiling¶