第一章 数据结构和算法

1.1 将序列分解为单独的变量

p = (4, 5)
x, y = p
print x 
print y 
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
name, shares, price, date = data
print name
print shares 
print price 
print date 
name, shares, price, (year, mon, day ) = data
print year 
p = (4, 5)
#x, y, z = p 错误!!!
s = 'hello!'
a, b, c, d, e, f = s
print a
print f
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
_, shares, price, _ = data 
print shares
print price

1.2 从任意长度的可迭代对象中分解元素

from audioop import avg
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)
record = ('Dave', 'dave@example.com', '777-333-2323', '234-234-2345')
name, email, *phone_numbers = record
print name 
print email
print phone_numbers
*trailing, current = [10, 8, 7, 2, 5]
print trailing  #[10, 8, 7, 2, ]
print current #5
records = [
           ('foo', 1, 2),
           ('bar', 'hello'),
           ('foo', 5, 3)
def do_foo(x, y):
    print ('foo', x, y)
def do_bar(s):
    print ('bar', s)
for tag, *args in records:
    if tag == 'foo':
    elif tag == 'bar':
line = 'asdf:fedfr234://wef:678d:asdf'
uname, *fields, homedir, sh = line.split(':')
print uname 
print homedir
record = ('ACME', 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
print name
print year
items = [1, 10, 7, 4, 5, 9]
head, *tail = items
print head #1
print tail #[10, 7, 4, 5, 9]
def sum(items):
    head, *tail = items
    return head + sum(tail) if tail else head

1.3 保存最后N个元素

from _collections import deque
def search(lines, pattern, history=5):
    previous_lines = deque(maxlen = history)
    for line in lines:
        if pattern in line:
            yield line, previous_lines
# Example use on a file
if __name__ == '__main__':
    with open('somefile.txt') as f:
        for line, prevlines in search(f, 'python', 5):
            for pline in prevlines:
                print (pline) #print (pline, end='')
            print (line) #print (pline, end='')
            print ('-'*20)
q = deque(maxlen=3)
print q
print q
q = deque()
print q
print q
q_pop = q.pop()
print q_pop
print q
q_popleft = q.popleft()
print q_popleft
print q

1.4 找到最大或最小的N个元素

import heapq
nums = [1,30,6,2,36,33,46,3,23,43]
print (heapq.nlargest(3, nums))
print (heapq.nsmallest(3, nums))
portfolio = [
                 {'name':'IBM', 'shares':100, 'price':2.4},
                 {'name':'A', 'shares':1040, 'price':12.4},
                 {'name':'S', 'shares':40, 'price':23.4},
                 {'name':'D', 'shares':1, 'price':2.49},
                 {'name':'F', 'shares':9, 'price':24}
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print cheap
print expensive
nums = [1,8,2,23,7,-4,18,23,42,37,2]
heap = list(nums)
print heap
print heap
print heapq.heappop(heap)
print heapq.heappop(heap)
print heapq.heappop(heap)

1.5 实现优先级队列

import heapq
class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
    def pop(self):
        return heapq.heappop(self._queue)[-1]
class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)
q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('spam'), 4)
q.push(Item('bar'), 5)
q.push(Item('grok'), 1)
print q.pop()
print q.pop()
print q.pop()
a = Item('foo')
b = Item('bar')
#a < b    error
a = (1, Item('foo'))
b = (5, Item('bar'))
print a < b
c = (1, Item('grok'))
#a < c  error
a = (1, 0, Item('foo'))
b = (5, 1, Item('bar'))
c = (1, 2, Item('grok'))
print a < b
print a < c

1.6 在字典中将建映射到多个值上

d = {  
        'a' : [1, 2, 3],  
        'b' : [4, 5]  
e = {  
        'a' : {1, 2, 3},  
        'b' : {4, 5}  
from collections import defaultdict  
d = defaultdict(list)  
print d  
d = defaultdict(set)  
print d  
d = {}  
d.setdefault('a', []).append(1)  
d.setdefault('a', []).append(2)  
d.setdefault('b', []).append(3)  
print d   
d = {}  
for key, value in d:#pairs:  
    if key not in d:  
        d[key] = []  
d = defaultdict(list)  
for key, value in d:#pairs:  

1.7 让字典保持有序

from collections import OrderedDict  
d = OrderedDict()  
d['foo'] = 1  
d['bar'] = 2  
d['spam'] = 3  
d['grol'] = 4  
for key in d:  
    print (key, d[key])  
import json  

1.8 与字典有关的计算问题

price = {  
min_price = min( zip( price.values(), price.keys() ) )  
print min_price  
max_price = max( zip( price.values(), price.keys() ) )  
print max_price  
price_sorted = sorted( zip( price.values(), price.keys() ) )  
print price_sorted     
price_and_names = zip( price.values(), price.keys() )  
print (min(price_and_names))  
#print (max(price_and_names))  error  zip()创建了迭代器,内容只能被消费一次  
print min(price)  
print max(price)  
print min(price.values())  
print max(price.values())  
print min(price, key = lambda k : price[k])  
print max(price, key = lambda k : price[k])  
min_value = price[ min(price, key = lambda k : price[k]) ]  
print min_value  
price = {  
            'AAA': 23,  
            'ZZZ': 23,  
print min( zip( price.values(), price.keys() ) )  
print max( zip( price.values(), price.keys() ) )

1.9 在两个字典中寻找相同点

a = {  
b = {  
print a.keys() & b.keys() #{'x','y'}  
print a.keys() - b.keys() #{'z'}  
print a.items() & b.items() #{('y', 2)}  
c = {key: a[key] for key in a.keys() - {'z', 'w'} }  
print c #{'x':1, 'y':2}

1.10 从序列中移除重复项且保持元素间顺序不变

def dedupe(items):  
    seen = set()  
    for item in items:  
        if item not in seen:  
            yield item  
a = [1,5,2,1,9,1,5,10]  
print list(dedupe(a))  
def dedupe2(items, key = None):  
    seen = set()  
    for item in items:  
        val = item if key is None else key(item)  
        if val not in seen:  
            yield item  
a = [   
        {'x':1, 'y':2},   
        {'x':1, 'y':3},   
        {'x':1, 'y':2},   
        {'x':2, 'y':4},   
print list( dedupe2(a, key=lambda d : (d['x'], d['y']) ) )  
print list( dedupe2(a, key=lambda d : (d['x']) ) )  
a = [1,5,2,1,9,1,5,10]  
print set(a)

1.11 对切片命名

items = [0,1,2,3,4,5,6]  
a = slice(2,4)  
print items[2:4]  
print items[a]  
items[a] = [10,11]  
print items  
print a.start  
print a.stop  
print a.step

1.12 找出序列中出现次数最多的元素

words = [  
            'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',  
            'the', 'look'  
from collections import Counter  
word_counts = Counter(words)  
top_three = word_counts.most_common(3)  
print top_three  
print word_counts['look']  
print word_counts['the']  
morewords = ['why', 'are', 'you', 'not', 'looking', 'in', 'my', 'eyes']  
for word in morewords:  
    word_counts[word] += 1  
print word_counts['eyes']  
print word_counts['why']  
print word_counts['eyes']  
print word_counts['why']  
a = Counter(words)  
b = Counter(morewords)  
print a  
print b  
c = a + b  
print c  
d = a - b  
print b

1.13 通过公共键对字典列表排序

rows = [  
            {'fname':'Brian', 'lname':'Jones', 'uid':1003},  
            {'fname':'David', 'lname':'Beazley', 'uid':1002},  
            {'fname':'John', 'lname':'Cleese', 'uid':1001},  
            {'fname':'Big', 'lname':'Jones', 'uid':1004}  
from operator import itemgetter  
rows_by_fname = sorted(rows, key=itemgetter('fname'))  
rows_by_uid = sorted(rows, key=itemgetter('uid'))  
print rows_by_fname  
print rows_by_uid  
rows_by_lfname = sorted(rows, key=itemgetter('lname', 'fname'))  
print rows_by_lfname  
rows_by_fname = sorted(rows, key=lambda r: r['fname'])  
rows_by_lfname = sorted(rows, key=lambda r: (r['fname'], r['lname']))  
print rows_by_fname  
print rows_by_lfname  
print min(rows, key=itemgetter('uid'))  
print max(rows, key=itemgetter('uid'))

1.14 对不原生支持比较操作的对象排序

class User:  
    def __init__(self, user_id):  
        self.user_id = user_id  
    def __repr__(self):  
        return 'User({})'.format(self.user_id)  
users = [User(23), User(3), User(99)]  
print users  
print sorted(users, key = lambda u: u.user_id)  
from operator import attrgetter  
print sorted(users, key=attrgetter('user_id'))  
print min(users, key=attrgetter('user_id'))  
print max(users, key=attrgetter('user_id'))

1.15 根据字段将记录分组

rows = [  
            {'address':'5412 N CLARK', 'data':'07/01/2012'},  
            {'address':'5232 N CLARK', 'data':'07/04/2012'},  
            {'address':'5542 E 58ARK', 'data':'07/02/2012'},  
            {'address':'5152 N CLARK', 'data':'07/03/2012'},  
            {'address':'7412 N CLARK', 'data':'07/02/2012'},  
            {'address':'6789 w CLARK', 'data':'07/03/2012'},  
            {'address':'9008 N CLARK', 'data':'07/01/2012'},  
            {'address':'2227 W CLARK', 'data':'07/04/2012'}  
from operator import itemgetter  
from itertools import groupby  
for data, items in groupby(rows, key=itemgetter('data')):  
    print (data)  
    for i in items:  
        print (' ', i)  
from collections import defaultdict  
rows_by_date = defaultdict(list)  
for row in rows:  
for r in rows_by_date['07/04/2012']:  

1.16 筛选序列中的元素

mylist = [1,4,-5,10,-7,2,3,-1]  
print [n for n in mylist if n > 0]#列表推导式  
print [n for n in mylist if n < 0]  
pos = (n for n in mylist if n > 0)#生成器表达式  
print pos  
for x in pos:  
values = ['1', '2', '-3', '-', '4', 'N/A', '5']  
def is_int(val):  
        x = int(val)  
        return True  
    except ValueError:  
        return False  
ivals = list(filter(is_int, values))  
mylist = [1,4,-5,10,-7,2,3,-1]  
import math  
print [math.sqrt(n) for n in mylist if n > 0]  
clip_neg = [n if n > 0 else 0 for n in mylist]  
print clip_neg  
clip_pos = [n if n < 0 else 0 for n in mylist]  
print clip_pos  
addresses = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']  
counts = [0, 3, 10, 4, 1, 7, 6, 1]  
from itertools import compress  
more5 = [n > 5 for n in counts]  
print more5  
print list(compress(addresses, more5))

1.17 从字典中提取子集

prices = {'ACNE':45.23, 'AAPL':612.78, 'IBM':205.55, 'HPQ':37.20, 'FB':10.75}  
p1 = { key:value for key, value in prices.items() if value > 200 }  
print p1  
tech_names = {'AAPL', 'IBM', 'HPQ'}  
p2 = { key:value for key, value in prices.items() if key in tech_names }  
print p2  
p3 = dict( (key, value) for key, value in prices.items() if value > 200 ) #慢  
print p3  
tech_names = {'AAPL', 'IBM', 'HPQ'}  
p4 = { key:prices[key] for key in prices.keys() if key in tech_names } #慢  
print p4

1.18 将名称映射到序列的元素中

from collections import namedtuple  
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])  
sub = Subscriber('wang@qq.com', '2020-10-10')  
print sub  
print sub.joined  
print sub.addr  
print len(sub)  
addr, joined = sub  
print addr  
print joined  
def compute_cost(records):  
    total = 0.0  
    for rec in records:  
        total += rec[1]*rec[2]  
    return total  
Stock = namedtuple('Stock', ['name', 'shares', 'price'])  
def compute_cost2(records):  
    total = 0.0  
    for rec in records:  
        s = Stock(*rec)  
        total += s.shares * s.price  
    return total  
s = Stock('ACME', 100, 123.45)  
print s  
#s.shares = 75    #error  
s = s._replace(shares=75)  
print s  
Stock = namedtuple('Stock', ['name', 'shares', 'price', 'date', 'time'])  
stock_prototype = Stock('',0, 0.0, None, None)  
def dict_to_stock(s):  
    return stock_prototype._replace(**s)  
a = {'name':'ACME', 'shares':100, 'price':123.45}  
print dict_to_stock(a)  
b = {'name':'ACME', 'shares':100, 'price':123.45, 'date':'12/12/2012'}  
print dict_to_stock(b)

1.19 同时对数据做转换和换算

nums = [1, 2, 3, 4, 5]  
s = sum( x*x for x in nums )  
print s  
import os  
files = os.listdir('dirname')  
if any(name.endswith('.py') for name in files):  
    print('There be Python!')  
    print('sorry, no Python!')  
s = ('ACME', 50, 123.45)  
print(','.join(str(x) for x in s))  
portfolio = [  
                {'name':'GOOG', 'shares':50},  
                {'name':'YHOO', 'shares':75},  
                {'name':'AOL', 'shares':20},  
                {'name':'SCOX', 'shares':65}  
min_shares = min(s['shares'] for s in portfolio)  
print min_shares      
min_shares = min(portfolio, key=lambda s: s['shares'])  
print min_shares  
 1.20    将多个映射合并为单个映射
a = {'x':1, 'z':3}  
b = {'y':2, 'z':4}  
#from collections import ChainMap  
from pip._vendor.distlib.compat import ChainMap  
c = ChainMap(a, b)  
print(c['z']) #from a    第一个映射中的值  
print len(c)  
print list(c.values())  
c['z'] = 10  
c['w'] = 40  
del c['x']  
print a  
#del c['y']    #error    修改映射的操作总是会作用在列表的第一个映射结构上  
values = ChainMap()  
values['x'] = 1  
values = values.new_child()#add a new map  
values['x'] = 2  
values = values.new_child()  
values['x'] = 3  
#print values  
print values['x']  
values = values.parents  
print values['x']  
values = values.parents  
print values['x']  
a = {'x':1, 'z':3}  
b = {'y':2, 'z':4}  
merged = dict(b)  
print merged['x']  
print merged['y']  
print merged['z']  
a['x'] = 13  
print merged['x']   #不会反应到合并后的字典中  
a = {'x':1, 'z':3}  
b = {'y':2, 'z':4}  
merged = ChainMap(a, b)  
print merged['x']  
a['x'] = 42  
print merged['x']   #会反应到合并后的字典中
