It is often helpful to have a test dataframe. This function makes one with several optional variants. I use it surprisingly often. It is part of great2.
import pandas as pdimport numpy as npdef test_df(nrows=10, ncols=3, multi_index=False):""" make a dummy test dataframe """if multi_index: ncols +=2 colnames =list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')assert ncols <26 df = pd.DataFrame(np.random.rand(nrows, ncols), columns=colnames[:ncols]) df.columns.name ='col_name' df.index.name ='idx_name'if multi_index: df.iloc[:, -2] =range(nrows) df.iloc[:, -1] = np.random.choice(colnames, nrows) df = df.set_index(list(df.columns[-2:])) df.index.names = ['l1_name', 'l2_name']return dfprint(test_df())print('\nWith multiindex\n')print(test_df(multi_index=True))
col_name A B C
idx_name
0 0.475410 0.283504 0.201486
1 0.570923 0.518903 0.249603
2 0.108568 0.804149 0.431516
3 0.605882 0.937283 0.203796
4 0.135818 0.315772 0.306703
5 0.151252 0.515751 0.739565
6 0.936325 0.937864 0.620420
7 0.481101 0.999679 0.171918
8 0.879256 0.063128 0.418288
9 0.547664 0.855129 0.552027
With multiindex
col_name A B C
l1_name l2_name
0.0 Q 0.291544 0.745871 0.044753
1.0 V 0.573363 0.223773 0.810040
2.0 I 0.514169 0.386114 0.738583
3.0 F 0.723874 0.204980 0.544518
4.0 V 0.205949 0.149275 0.338569
5.0 J 0.602579 0.457508 0.445222
6.0 K 0.291321 0.377925 0.235798
7.0 I 0.193860 0.872993 0.047143
8.0 K 0.312000 0.742774 0.046326
9.0 O 0.684348 0.535367 0.058009
C:\Users\steve\AppData\Local\Temp\ipykernel_2744\3455968740.py:19: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['Q' 'V' 'I' 'F' 'V' 'J' 'K' 'I' 'K' 'O']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
df.iloc[:, -1] = np.random.choice(colnames, nrows)