chapter7-Pandas数据分析实战.zip

2301_81435636chapter7-Pandas数据分析实战.zip 82.95KB

资源文件列表:

chapter7-Pandas数据分析实战.zip 大约有16个文件

chapter7-Pandas数据分析实战/7.1.1series.py 2.75KB
chapter7-Pandas数据分析实战/7.1.2DateTimeIndex.py 3.02KB
chapter7-Pandas数据分析实战/7.1.3 two-dimensional-DateFrame.py 1.54KB
chapter7-Pandas数据分析实战/7.11-7.12 resample-sort_index.py 2.57KB
chapter7-Pandas数据分析实战/7.2.10 pivot-crosstab.py 2.52KB
chapter7-Pandas数据分析实战/7.2.13 std-cov.py 1.38KB
chapter7-Pandas数据分析实战/7.2.14 assessors.py 1.79KB
chapter7-Pandas数据分析实战/7.2.15 figure.py 1.43KB
chapter7-Pandas数据分析实战/7.2.17 merge.py 1.7KB
chapter7-Pandas数据分析实战/7.2.1-7.2.4 read-dataset-from-excel.py 5.36KB
chapter7-Pandas数据分析实战/7.2.5summary-task.py 2.71KB
chapter7-Pandas数据分析实战/7.2.6-7.2.8 deal-with-unnormal-value.py 3.92KB
chapter7-Pandas数据分析实战/7.2.9 data-difference.py 998B
chapter7-Pandas数据分析实战/as_index.py 189B
chapter7-Pandas数据分析实战/超市营业额2.xlsx 19.94KB
chapter7-Pandas数据分析实战/第7章+pandas数据分析实战.ipynb 143.44KB

资源介绍:

chapter7-Pandas数据分析实战.zip

# -*- coding: utf-8 -*- """ Created on Thu Nov 14 16:30:11 2024 @author: Hou-Liu """ """7.2.1 读取Excel文件中的数据""" import pandas as pd # 设置列对齐 pd.set_option('display.unicode.ambiguous_as_wide', True) pd.set_option('display.unicode.east_asian_width', True) # 读取工号、姓名、时段、交易额这四列数据，使用默认索引 # 如果可以确定数据类型，使用usecols={'工号':str,'姓名':str,'交易额':float}可以提高速度 df = pd.read_excel(r'超市营业额2.xlsx', usecols=['工号','姓名','时段','交易额']) # 输出前10行数据 print(df[:10], end='\n\n') # 读取第一个worksheet中所有列 # 跳过第1、3、5行，指定下标为1的列中数据为DataFrame的行索引标签 df = pd.read_excel(r'超市营业额2.xlsx', skiprows=[1,3,5], index_col=1) print(df[:10]) ###7.2.2 筛选符合特定条件的数据 import pandas as pd # 设置列对齐 pd.set_option('display.unicode.ambiguous_as_wide', True) pd.set_option('display.unicode.east_asian_width', True) # 读取全部数据，使用默认索引 df = pd.read_excel(r'超市营业额2.xlsx') # 对行进行切片，注意使用序号切片时是限定的左闭右开区间 # 如果使用index标签切片，限定的是闭区间 print('下标在[5,10]区间的的行'.ljust(20,'='), df[5:11], sep='\n') # iloc、iat使用数字做索引 # 相当于print(df.iloc[5, :]) print('索引为5的行'.ljust(20,'='), df.iloc[5], sep='\n') # 行下标为3、列下标为2的值 print('行下标为3、列下标为2的值'.ljust(20,'=')) print(df.iloc[3, 2]) print(df.iat[3, 2]) print('下标为[3,5,10]的行'.ljust(20,'=')) print(df.iloc[[3,5,10], :]) print('行下标为[3,5,10]，列下标为[0,1,4]'.ljust(30, '=')) print(df.iloc[[3,5,10], [0,1,4]]) print('查看指定的列前5行数据'.ljust(20,'=')) print(df['交易额'][:5]) print(df[['姓名', '时段', '交易额']][:5]) print('只查看前10行指定的列'.ljust(20,'=')) print(df[:10][['姓名', '日期', '柜台']]) print('下标为[3,5,10]的行的指定列'.ljust(20,'=')) # loc和at使用标签做索引 print(df.loc[[3,5,10], ['姓名','交易额']]) print('行下标为3,姓名列的值'.ljust(20,'='), df.at[3, '姓名'], sep='\n') print('交易额高于1700元的数据'.ljust(20,'='), df[df['交易额']>1700], sep='\n') print('交易总额'.ljust(20,'='), df['交易额'].sum(), sep='\n') print('下午班的交易总额'.ljust(20,'=')) print(df[df['时段']=='14:00~21:00']['交易额'].sum()) # 等价写法 print(df.loc[df['时段']=='14:00~21:00','交易额'].sum()) print('张三下午班的交易情况'.ljust(20,'=')) print(df[(df.姓名=='张三')&(df.时段=='14:00~21:00')][:10]) print('日用品柜台销售总额'.ljust(20,'=')) print(df[df['柜台']=='日用品']['交易额'].sum()) print('张三和李四二人销售总额'.ljust(20,'=')) print(df[df['姓名'].isin(['张三','李四'])]['交易额'].sum()) print('交易额在指定范围内的记录'.ljust(20,'=')) print(df[df['交易额'].between(800, 850)]) ###7.2.3 查看数据特征和统计信息 import pandas as pd # 读取全部数据，使用默认索引 df = pd.read_excel(r'超市营业额2.xlsx') print('查看交易额统计信息'.ljust(20,'='), df['交易额'].describe(), sep='\n') print('交易额四分位数'.ljust(20,'=')) # 查看最小值、四分之一数、半数、最大值 print(df['交易额'].quantile([0, 0.25, 0.5, 0.75, 1.0])) print('交易额中值'.ljust(20,'='), df['交易额'].median(), sep='\n') print('交易额最小的三条记录'.ljust(20,'='), df.nsmallest(3, '交易额'), sep='\n') print('交易额最大的5条记录'.ljust(20,'='), df.nlargest(5, '交易额'), sep='\n') print('最后一个日期'.ljust(20,'='), df['日期'].max(), sep='\n') print('最小的工号'.ljust(20,'='), df['工号'].min(), sep='\n') print('第一个最小交易额的行下标'.ljust(20,'=')) index = df['交易额'].idxmin() print(index) print('第一个最小交易额'.ljust(20,'=')) print(df.loc[index,'交易额'], df.交易额.min()) print('第一个最大交易额的行下标'.ljust(20,'=')) index = df['交易额'].idxmax() print(index) print('第一个最大交易额'.ljust(20,'=')) print(df.loc[index,'交易额'], df.交易额.max()) ###7.2.4 按不同要求对数据排序 import pandas as pd # 设置列对齐 pd.set_option('display.unicode.ambiguous_as_wide', True) pd.set_option('display.unicode.east_asian_width', True) # 读取全部数据，使用默认索引 df = pd.read_excel('超市营业额2.xlsx') print('按交易额和工号降序排序'.ljust(20, '=')) print(df.sort_values(by=['交易额','工号'], ascending=False)[:12]) print('按交易额降序、工号升序排序'.ljust(20, '=')) print(df.sort_values(by=['交易额','工号'], ascending=[False,True])[:12]) print('按工号升序排序'.ljust(20, '=')) print(df.sort_values(by='工号', na_position='last')[:10]) print('按列名汉字Unicode编码升序排序'.ljust(20, '=')) # 注意，这里是按汉字的Unicode编码排序 print(df.sort_index(axis=1, ascending=True)[:10]) print('按姓名汉字拼音升序排序'.ljust(20, '=')) # from copy import deepcopy # from pypinyin import pinyin # dff = deepcopy(df) # dff['拼音'] = dff.姓名.map(pinyin) # print(dff.sort_values(by='拼音').drop('拼音',axis=1)[:10])