python基礎知識之索引與切片詳解

2022-05-14 13:01:15

基本索引

In [4]: sentence = 'You are a nice girl'In [5]: L = sentence.split()In [6]: LOut[6]: ['You', 'are', 'a', 'nice', 'girl']

# 從0開始索引In [7]: L[2]Out[7]: 'a'

# 負數索引，從列表右側開始計數In [8]: L[-2]Out[8]: 'nice'

# -1表示列表最後一項In [9]: L[-1]Out[9]: 'girl'

# 當正整數索引超過返回時In [10]: L[100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-10-78da2f882365> in <module>()----> 1 L[100]IndexError: list index out of range# 當負整數索引超過返回時In [11]: L[-100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-11-46b47b0ecb55> in <module>()----> 1 L[-100]IndexError: list index out of range# slice 索引In [193]: sl = slice(0,-1,1)In [194]: L[sl]Out[194]: ['You', 'are', 'a', 'nice']In [199]: sl = slice(0,100)In [200]: L[sl]Out[200]: ['You', 'are', 'a', 'nice', 'girl']

巢狀索引

In [14]: L = [[1,2,3],{'I':'You are a nice girl','She':'Thank you!'},(11,22),'My name is Kyles']

In [15]: L
Out[15]:
[[1, 2, 3],
 {'I': 'You are a nice girl', 'She': 'Thank you!'},
 (11, 22),
 'My name is Kyles']# 索引第1項，索引為0In [16]: L[0]
Out[16]: [1, 2, 3]# 索引第1項的第2子項In [17]: L[0][1]
Out[17]: 2# 索引第2項詞典In [18]: L[1]
Out[18]: {'I': 'You are a nice girl', 'She': 'Thank you!'}# 索引第2項詞典的 「She」In [19]: L[1]['She']
Out[19]: 'Thank you!'# 索引第3項In [20]: L[2]
Out[20]: (11, 22)# 索引第3項，第一個元組In [22]: L[2][0]
Out[22]: 11# 索引第4項In [23]: L[3]
Out[23]: 'My name is Kyles'# 索引第4項，前3個字元In [24]: L[3][:3]
Out[24]: 'My '

切片

# 切片選擇,從1到列表末尾In [13]: L[1:]Out[13]: ['are', 'a', 'nice', 'girl']# 負數索引，選取列表後兩項In [28]: L[-2:]Out[28]: ['nice', 'girl']# 異常測試,這裡沒有報錯！In [29]: L[-100:]Out[29]: ['You', 'are', 'a', 'nice', 'girl']# 返回空In [30]: L[-100:-200]Out[30]: []# 正向索引In [32]: L[-100:3]Out[32]: ['You', 'are', 'a']# 返回空In [33]: L[-1:3]Out[33]: []# 返回空In [41]: L[0:0]Out[41]: []

看似簡單的索引，有的人不以為然，我們這裡採用精準的數位索引，很容易排查錯誤。若索引是經過計算出的一個變數，就千萬要小心了，否則失之毫釐差之千里。

numpy.array 索引一維

In [34]: import numpy as npIn [35]: arr = np.arange(10)In [36]: arrOut[36]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])In [40]: arr.shapeOut[40]: (10,)# [0,1) In [37]: arr[0:1]Out[37]: array([0])# [0,0) In [38]: arr[0:0]Out[38]: array([], dtype=int32)# 右側超出範圍之後In [42]: arr[:1000]Out[42]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 左側超出之後In [43]: arr[-100:1000]Out[43]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])# 兩側都超出In [44]: arr[100:101]Out[44]: array([], dtype=int32)# []In [45]: arr[-100:-2]Out[45]: array([0, 1, 2, 3, 4, 5, 6, 7])# []In [46]: arr[-100:-50]Out[46]: array([], dtype=int32)

numpy.array 索引二維

In [49]: arr = np.arange(15).reshape(3,5)

In [50]: arr
Out[50]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [51]: arr.shape
Out[51]: (3, 5)

# axis = 0 增長的方向
In [52]: arr[0]
Out[52]: array([0, 1, 2, 3, 4])

# 選取第2行
In [53]: arr[1]
Out[53]: array([5, 6, 7, 8, 9])

# axis = 1 增長的方向，選取每一行的第1列
In [54]: arr[:,0]
Out[54]: array([ 0,  5, 10])

# axis = 1 增長的方向，選取每一行的第2列
In [55]: arr[:,1]
Out[55]: array([ 1,  6, 11])


# 選取每一行的第1,2列
In [56]: arr[:,0:2]
Out[56]:
array([[ 0,  1],
       [ 5,  6],
       [10, 11]])

# 右側超出範圍之後
In [57]: arr[:,0:100]
Out[57]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

# 左側超出範圍之後
In [62]: arr[:,-10:2]
Out[62]:
array([[ 0,  1],
       [ 5,  6],
       [10, 11]])

# []
In [58]: arr[:,0:0]
Out[58]: array([], shape=(3, 0), dtype=int32)

# []
In [59]: arr[0:0,0:1]
Out[59]: array([], shape=(0, 1), dtype=int32)

# 異常
In [63]: arr[:,-10]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-63-2ffa6627dc7f> in <module>()----> 1 arr[:,-10]IndexError: index -10 is out of bounds for axis 1 with size 5

numpy.array 索引三維…N維

In [67]: import numpy as np

In [68]: arr = np.arange(30).reshape(2,3,5)

In [69]: arr
Out[69]:
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

# 根據 axis = 0 選取
In [70]: arr[0]
Out[70]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [71]: arr[1]
Out[71]:
array([[15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29]])

# 根據 axis = 1 選取
In [72]: arr[:,0]
Out[72]:
array([[ 0,  1,  2,  3,  4],
       [15, 16, 17, 18, 19]])

In [73]: arr[:,1]
Out[73]:
array([[ 5,  6,  7,  8,  9],
       [20, 21, 22, 23, 24]])

# 異常指出 axis = 1 超出範圍
In [74]: arr[:,4]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-74-9d489478e7c7> in <module>()----> 1 arr[:,4]IndexError: index 4 is out of bounds for axis 1 with size 3  # 根據 axis = 2 選取
In [75]: arr[:,:,0]
Out[75]:
array([[ 0,  5, 10],
       [15, 20, 25]])

# 降維
In [76]: arr[:,:,0].shape
Out[76]: (2, 3)

In [78]: arr[:,:,0:2]
Out[78]:
array([[[ 0,  1],
        [ 5,  6],
        [10, 11]],       [[15, 16],
        [20, 21],
        [25, 26]]])

In [79]: arr[:,:,0:2].shape
Out[79]: (2, 3, 2)

# 左/右側超出範圍
In [81]: arr[:,:,0:100]
Out[81]:
array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]],       [[15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

# 異常 axis = 0In [82]: arr[100,:,0:100]---------------------------------------------------------------------------IndexError                                Traceback (most recent call last)
<ipython-input-82-21efcc74439d> in <module>()----> 1 arr[100,:,0:100]IndexError: index 100 is out of bounds for axis 0 with size 2

pandas Series 索引

In [84]: s = pd.Series(['You','are','a','nice','girl'])In [85]: sOut[85]:0     You1     are2       a3    nice4    girl
dtype: object# 按照索引選擇In [86]: s[0]Out[86]: 'You'# []In [87]: s[0:0]Out[87]: Series([], dtype: object)In [88]: s[0:-1]Out[88]:0     You1     are2       a3    nice
dtype: object# 易錯點,ix包含區間為 []In [91]: s.ix[0:0]Out[91]:0    You
dtype: objectIn [92]: s.ix[0:1]Out[92]:0    You1    are
dtype: object# ix索引不存在indexIn [95]: s.ix[400]
KeyError: 400# 按照從0開始的索引In [95]: s.iloc[0]Out[95]: 'You'In [96]: s.iloc[1]Out[96]: 'are'In [97]: s.iloc[100]
IndexError: single positional indexer is out-of-boundsIn [98]: s = pd.Series(['You','are','a','nice','girl'], index=list('abcde'))In [99]: sOut[99]:
a     You
b     are
c       a
d    nice
e    girl
dtype: objectIn [100]: s.iloc[0]Out[100]: 'You'In [101]: s.iloc[1]Out[101]: 'are'# 按照 label 索引In [103]: s.loc['a']Out[103]: 'You'In [104]: s.loc['b']Out[104]: 'are'In [105]: s.loc[['b','a']]Out[105]:
b    are
a    You
dtype: object# loc切片索引In [106]: s.loc['a':'c']Out[106]:
a    You
b    are
c      a
dtype: objectIn [108]: s.indexOut[108]: Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

pandas DataFrame 索引

In [114]: import pandas as pdIn [115]: df = pd.DataFrame({'open':[1,2,3],'high':[4,5,6],'low':[6,3,1]}, index=pd.period_range('30/12/2017',perio
     ...: ds=3,freq='H'))In [116]: dfOut[116]:
                  high  low  open2017-12-30 00:00     4    6     12017-12-30 01:00     5    3     22017-12-30 02:00     6    1     3# 按列索引In [117]: df['high']Out[117]:2017-12-30 00:00    42017-12-30 01:00    52017-12-30 02:00    6Freq: H, Name: high, dtype: int64In [118]: df.highOut[118]:2017-12-30 00:00    42017-12-30 01:00    52017-12-30 02:00    6Freq: H, Name: high, dtype: int64In [120]: df[['high','open']]Out[120]:
                  high  open2017-12-30 00:00     4     12017-12-30 01:00     5     22017-12-30 02:00     6     3In [122]: df.ix[:]
D:CodeToolPythonPython36Scriptsipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or.iloc for positional indexingIn [123]: df.iloc[0:0]Out[123]:Empty DataFrame
Columns: [high, low, open]Index: []In [124]: df.ix[0:0]Out[124]:Empty DataFrame
Columns: [high, low, open]Index: []

# 按照 label 索引In [127]: df.indexOut[127]: PeriodIndex(['2017-12-30 00:00', '2017-12-30 01:00', '2017-12-30 02:00'], dtype='period[H]', freq='H')In [128]: df.loc['2017-12-30 00:00']Out[128]:
high    4low     6open    1Name: 2017-12-30 00:00, dtype: int64

# 檢查引數In [155]: df.loc['2017-12-30 00:00:11']Out[155]:
high    4low     6open    1Name: 2017-12-30 00:00, dtype: int64In [156]: df.loc['2017-12-30 00:00:66']
KeyError: 'the label [2017-12-30 00:00:66] is not in the [index]'

填坑

In [158]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=[2,3,4])In [159]: dfOut[159]:
   a  b2  1  43  2  54  3  6# iloc 取第一行正確用法In [160]: df.iloc[0]Out[160]:
a    1b    4Name: 2, dtype: int64

# loc 正確用法In [165]: df.loc[[2,3]]Out[165]:
   a  b2  1  43  2  5# 注意此處 index 是什麼型別In [167]: df.loc['2']
KeyError: 'the label [2] is not in the [index]'# 索引 Int64IndexOut[172]: Int64Index([2, 3, 4], dtype='int64')

# 索引為字串In [168]: df = pd.DataFrame({'a':[1,2,3],'b':[4,5,6]}, index=list('234'))In [169]: dfOut[169]:
   a  b2  1  43  2  54  3  6In [170]: df.indexOut[170]: Index(['2', '3', '4'], dtype='object')

# 此處沒有報錯，千萬注意 index 型別In [176]: df.loc['2']Out[176]:
a    1b    4Name: 2, dtype: int64

# ix 是一個功能強大的函數，但是爭議卻很大，往往是錯誤之源
# 咦，怎麼輸出與預想不一致！In [177]: df.ix[2]
D:CodeToolPythonPython36Scriptsipython:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecatedOut[177]:
a    3b    6Name: 4, dtype: int64

# 注意開閉區間In [180]: df.loc['2':'3']Out[180]:
   a  b2  1  43  2  5

總結

pandas中ix是錯誤之源，大型專案大量使用它時，往往造成不可預料的後果。0.20.x版本也標記為拋棄該函數，二義性和 []區間，違背 “Explicit is better than implicit.” 原則。建議使用意義明確的 iloc和loc 函數。

當使用字串時切片時是 []區間，一般是 [)區間

當在numpy.ndarry、list、tuple、pandas.Series、pandas.DataFrame 混合使用時，採用變數進行索引或者切割，取值或賦值時，別太自信了，千萬小心錯誤，需要大量的測試。

我在工程中使用matlab的矩陣和python混合使用以上物件，出現最多就是shape不對應，index，columns 錯誤。

最好不要混用不同資料結構，容易出錯，更增加轉化的效能開銷

到此這篇關於python基礎知識之索引與切片的文章就介紹到這了,更多相關python索引與切片內容請搜尋it145.com以前的文章或繼續瀏覽下面的相關文章希望大家以後多多支援it145.com！

python基礎知識之索引與切片詳解

目錄

基本索引

巢狀索引

切片

numpy.array 索引一維

numpy.array 索引二維

pandas Series 索引

pandas DataFrame 索引

填坑

總結

熱門文章

python基礎知識之索引與切片詳解

目錄

基本索引

巢狀索引

切片

numpy.array 索引 一維

numpy.array 索引 二維

pandas Series 索引

pandas DataFrame 索引

填坑

總結

熱門文章

numpy.array 索引一維

numpy.array 索引二維