import pandas as pd

# incomes per person, inflation adjusted, US $ 2000, source World Bank
incomes = pd.DataFrame({'Argentina': [5251, 6611, 7540, 5581, 7695, 10749],
                        'Egypt': [430, 565, 856, 1153, 1475, 1975],
                        'France': [7499, 11572, 15641, 18731, 21774, 22758],
                        'Sweden': [11360, 16570, 19470, 23492, 27869, 32631],
                        'USA': [13723, 18228, 22630, 28298, 35081, 37329]},
                       index=[1960,1970,1980,1990,2000,2010])

# children per woman
children = pd.DataFrame({'Argentina': [3.11, 3.07, 3.33, 2.99, 2.48, 2.22],
                        'Egypt': [6.63, 5.94, 5.37, 4.35, 3.31, 2.88],
                        'France': [2.77, 2.49, 1.83, 1.75, 1.82, 1.98],
                        'Sweden': [2.2, 1.92, 1.68, 2.14, 1.56, 1.99],
                        'USA': [3.67, 2.46, 1.82, 2.07, 2.05, 1.93]},
                       index=[1960,1970,1980,1990,2000,2010])

# concat is explain in lesson "Pandas 6 -- operations with 2 dataframes"
df = pd.concat({'incomes': incomes, 'children': children})
df

df.index

MultiIndex([( 'incomes', 1960),
            ( 'incomes', 1970),
            ( 'incomes', 1980),
            ( 'incomes', 1990),
            ( 'incomes', 2000),
            ( 'incomes', 2010),
            ('children', 1960),
            ('children', 1970),
            ('children', 1980),
            ('children', 1990),
            ('children', 2000),
            ('children', 2010)],
           )

df.Egypt

incomes   1960     430.00
          1970     565.00
          1980     856.00
          1990    1153.00
          2000    1475.00
          2010    1975.00
children  1960       6.63
          1970       5.94
          1980       5.37
          1990       4.35
          2000       3.31
          2010       2.88
Name: Egypt, dtype: float64

df.Egypt['incomes']

1960     430.0
1970     565.0
1980     856.0
1990    1153.0
2000    1475.0
2010    1975.0
Name: Egypt, dtype: float64

print("Children per women in the USA in 1970 = ", df.USA['children'][1970])
print("Children per women in the USA in 1990 = ", df.USA['children',1990])

Children per women in the USA in 1970 =  2.46
Children per women in the USA in 1990 =  2.07

display('France data in 2000:',                   df['France'][:, 2000])

'France data in 2000:'

incomes     21774.00
children        1.82
Name: France, dtype: float64

df.loc[('incomes', [1960,2010]), :]

df.loc['children']

df.loc[('incomes',1970):('incomes', 2000), 'Egypt':'Sweden']

df.loc[(slice(None), 1960), :]

df.loc[pd.IndexSlice[:, 1990:2010], :]

df.T  # transpose

df.T.children[1970]

Argentina    3.07
Egypt        5.94
France       2.49
Sweden       1.92
USA          2.46
Name: 1970, dtype: float64

df2 = df.reset_index(level=1) # index to column
print(df2.columns)
df2.rename(columns={'level_1':'year'}, inplace=True)
df2

Index(['level_1', 'Argentina', 'Egypt', 'France', 'Sweden', 'USA'], dtype='object')

df2.set_index('year', append=True)  # without append it would remove the previous index (try it!)

# let use a smaller DataFrame
df_small =df.loc[(['incomes','children'], [1970,1990,2010]), ['Argentina','Sweden']]
df_small

df_small.stack()

incomes   1970  Argentina     6611.00
                Sweden       16570.00
          1990  Argentina     5581.00
                Sweden       23492.00
          2010  Argentina    10749.00
                Sweden       32631.00
children  1970  Argentina        3.07
                Sweden           1.92
          1990  Argentina        2.99
                Sweden           2.14
          2010  Argentina        2.22
                Sweden           1.99
dtype: float64

df_small.unstack()

df_small.unstack().unstack()

Argentina  1970  incomes      6611.00
                 children        3.07
           1990  incomes      5581.00
                 children        2.99
           2010  incomes     10749.00
                 children        2.22
Sweden     1970  incomes     16570.00
                 children        1.92
           1990  incomes     23492.00
                 children        2.14
           2010  incomes     32631.00
                 children        1.99
dtype: float64

df_small.reorder_levels([1,0], axis='index').sort_index()  # swaplevel(0,1) would have done the same

dfss = df_small.stack()
dfss

incomes   1970  Argentina     6611.00
                Sweden       16570.00
          1990  Argentina     5581.00
                Sweden       23492.00
          2010  Argentina    10749.00
                Sweden       32631.00
children  1970  Argentina        3.07
                Sweden           1.92
          1990  Argentina        2.99
                Sweden           2.14
          2010  Argentina        2.22
                Sweden           1.99
dtype: float64

dfss.swaplevel(0,1)

1970  incomes   Argentina     6611.00
                Sweden       16570.00
1990  incomes   Argentina     5581.00
                Sweden       23492.00
2010  incomes   Argentina    10749.00
                Sweden       32631.00
1970  children  Argentina        3.07
                Sweden           1.92
1990  children  Argentina        2.99
                Sweden           2.14
2010  children  Argentina        2.22
                Sweden           1.99
dtype: float64

dfss.reorder_levels([2,1,0]).sort_index()

Argentina  1970  children        3.07
                 incomes      6611.00
           1990  children        2.99
                 incomes      5581.00
           2010  children        2.22
                 incomes     10749.00
Sweden     1970  children        1.92
                 incomes     16570.00
           1990  children        2.14
                 incomes     23492.00
           2010  children        1.99
                 incomes     32631.00
dtype: float64

# Renomer

df2 = df.copy()
df2.index = df2.index.set_levels(['enfants', 'revenus'], level=0)
df2

# Effacer

df2.droplevel(1)

Multi-index for more dimensions¶

Retrieve the data¶

Extracting a block¶

All the elements of an index¶

Multi-columns¶

Column to index and vice versa¶

Stack & Unstack¶

Direction¶

Direct operations on multi-indexes¶

		Argentina	Egypt	France	Sweden	USA
incomes	1960	5251.00	430.00	7499.00	11360.00	13723.00
	1970	6611.00	565.00	11572.00	16570.00	18228.00
	1980	7540.00	856.00	15641.00	19470.00	22630.00
	1990	5581.00	1153.00	18731.00	23492.00	28298.00
	2000	7695.00	1475.00	21774.00	27869.00	35081.00
	2010	10749.00	1975.00	22758.00	32631.00	37329.00
children	1960	3.11	6.63	2.77	2.20	3.67
	1970	3.07	5.94	2.49	1.92	2.46
	1980	3.33	5.37	1.83	1.68	1.82
	1990	2.99	4.35	1.75	2.14	2.07
	2000	2.48	3.31	1.82	1.56	2.05
	2010	2.22	2.88	1.98	1.99	1.93

		Argentina	Egypt	France	Sweden	USA
incomes	1960	5251.0	430.0	7499.0	11360.0	13723.0
incomes	2010	10749.0	1975.0	22758.0	32631.0	37329.0

		Egypt	France	Sweden
incomes	1970	565.0	11572.0	16570.0
	1980	856.0	15641.0	19470.0
	1990	1153.0	18731.0	23492.0
	2000	1475.0	21774.0	27869.0

		Argentina	Egypt	France	Sweden	USA
	year
incomes	1960	5251.00	430.00	7499.00	11360.00	13723.00
	1970	6611.00	565.00	11572.00	16570.00	18228.00
	1980	7540.00	856.00	15641.00	19470.00	22630.00
	1990	5581.00	1153.00	18731.00	23492.00	28298.00
	2000	7695.00	1475.00	21774.00	27869.00	35081.00
	2010	10749.00	1975.00	22758.00	32631.00	37329.00
children	1960	3.11	6.63	2.77	2.20	3.67
	1970	3.07	5.94	2.49	1.92	2.46
	1980	3.33	5.37	1.83	1.68	1.82
	1990	2.99	4.35	1.75	2.14	2.07
	2000	2.48	3.31	1.82	1.56	2.05
	2010	2.22	2.88	1.98	1.99	1.93

		Argentina	Egypt	France	Sweden	USA
enfants	1960	5251.00	430.00	7499.00	11360.00	13723.00
	1970	6611.00	565.00	11572.00	16570.00	18228.00
	1980	7540.00	856.00	15641.00	19470.00	22630.00
	1990	5581.00	1153.00	18731.00	23492.00	28298.00
	2000	7695.00	1475.00	21774.00	27869.00	35081.00
	2010	10749.00	1975.00	22758.00	32631.00	37329.00
revenus	1960	3.11	6.63	2.77	2.20	3.67
	1970	3.07	5.94	2.49	1.92	2.46
	1980	3.33	5.37	1.83	1.68	1.82
	1990	2.99	4.35	1.75	2.14	2.07
	2000	2.48	3.31	1.82	1.56	2.05
	2010	2.22	2.88	1.98	1.99	1.93