df.loc[['row_name1', 'row_name2'], ['col1', 'col2', 'col3']]
return data frame 2 x 3
df[:, 'col']
return series
df[:, ['col']]
return data frame Rn x 1
df[['col']]
same as previous
df.loc[df['col1']>10, ['col1', 'col2']]
filtering rows with col1 > 10, show col1 and col2
df.loc[df['col1'].isin['condition1', 'condition2']]
df1.reset_index(inplace=True, drop=true)
after filtering, drop old index, create new index
df.sort_values(by=['col1', 'col2'], ascending=[True, False])
sort by col1 ascending, col2 descending
df['new_col'] = np.where(df['col1'>10, 'long', 'short')
create new col if col1 > 10, write value long, else short
df['new_col'] = np.array(2000)
create a new col and fill with 2000
df.groupby('col1')['col2'].agg('mean')
group data by col1, calculate mean of col2
same as df.groupby('col1')['col2'].mean()
df.groupby(['col1', 'col2']).agg(['mean', 'sum'])
group data by col1, then group each row by col2 (nesting). calculate mean and sum for each row
df.groupby('col1').agg({'col2': ['mean'], 'col3': ['min', 'max']})
group data by col1, create new column calculating mean of col2, create 2 more column calculating min and max of col3
[x*4 for x in list]
pd.apply('mean')
calculate mean for each column of pd
pd.select_dtypes(include = ['int64', 'float64']).apply('mean', axis = 1)
calculate mean for each row of pd, ignore none number values
import numpy as np
def bonus_func(col):
bonus = np.where(col > 10, True, False)
return bonus
sales['Jan_bonus'] = bonus_func(sales['Jan'])
sales
add a column to sales showing if bonus condition is meet (Jan sales > 10) for each row
bonus_frame = sales.select_dtype(include = ['int64', 'float64']).apply(bonus_func)
calculate bonus condition for each column
pd.concat([sales, bonus_frame]), axis = 1)
append bonus frame to sales
df.dropna()
drop rows with N/A
df.dropna(inplace = True)
drop rows in original frame
df.dropna(axis = 1)
drop columns with N/A
df.dropna(threshold = 2)
drop rows with 2 or more N/A
df.fillna(0)
fill N/A with 0
No comments:
Post a Comment