silly_browse/super_fish3_test.py at master · chuanwang66/silly_browse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#-*- coding:utf8 -*-
"""
    线程池 + requests 实现 “异步”!
    参考资料: https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example (线程池)
    参考资料: https://stackoverflow.com/questions/14245989/python-requests-non-blocking
"""
import requests
import concurrent.futures

URLS = ["https://github.com/chuanwang66/silly_player_x",
    "https://github.com/chuanwang66/silly_browse",
    "https://github.com/chuanwang66/easerpc_x",
    "https://github.com/chuanwang66/kcp",
    "https://github.com/chuanwang66/test_opencv",
    "https://github.com/chuanwang66/C-Thread-Pool",
    "https://github.com/chuanwang66/ngx_stream_upstream_check_module",
    "https://github.com/chuanwang66/AsyncNet",
    "https://github.com/chuanwang66/Spider",
    "https://github.com/chuanwang66/AudioStreaming",
    "https://github3.com/chuanwang66/fake"]

def requests_get_url(url, cookies={}, max_retries=3, verbose=False):
    r = None

    num_retries = 0
    while num_retries <= max_retries:
        try:
            r = requests.get(url, cookies=cookies, timeout=10.000)
            break
        except requests.exceptions.ConnectTimeout:
            if verbose: print('connect timeout')
            num_retries += 1
        except requests.exceptions.ConnectionError:
            if verbose: print('connect error')
            num_retries += 1
        except requests.exceptions.Timeout:
            if verbose: print('timeout')
            num_retries += 1
        except requests.exceptions.TooManyRedirects:
            if verbose: print('too many redirects')
            num_retries += 1
        except requests.exceptions.RequestException as e:
            if verbose: print(e)
            num_retries += 1
        if verbose: print('retry: %d'%(num_retries))
    return r

def test1():
    # We can use a with statement to ensure threads are cleaned up promptly
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {executor.submit(requests_get_url, url):url for url in URLS}
        futures = set(future_to_url)

        """
        concurrent.futures.as_completed(fs, timeout=None):
            An iterator over the given futures that yields each as it completes.

            Args:
                fs: The sequence of Futures (possibly created by different Executors) to iterate over.
                timeout: The maximum number of seconds to wait. If None, then there is no limit on the wait time.

            Returns:
                An iterator that yields the given Futures as they complete (finished or cancelled).
                If any given Futures are duplicated, they will be returned once.

            Raises:
                TimeoutError: If the entire result iterator could not be generated before the given timeout.

        """
        for future in concurrent.futures.as_completed(futures):
            try:
                r = future.result()
                url = future_to_url[future]
                if r:
                    print('%r page is %d bytes' % (url, len(r.content)))
                else:
                    print('%r page is null'%(url))
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))

def requests_get_url2(url, cookies={}, max_retries=3, verbose=False):
    r = None

    num_retries = 0
    while num_retries <= max_retries:
        try:
            r = requests.get(url, cookies=cookies, timeout=10.000)
            break
        except requests.exceptions.ConnectTimeout:
            if verbose: print('connect timeout')
            num_retries += 1
        except requests.exceptions.ConnectionError:
            if verbose: print('connect error')
            num_retries += 1
        except requests.exceptions.Timeout:
            if verbose: print('timeout')
            num_retries += 1
        except requests.exceptions.TooManyRedirects:
            if verbose: print('too many redirects')
            num_retries += 1
        except requests.exceptions.RequestException as e:
            if verbose: print(e)
            num_retries += 1
        if verbose: print('retry: %d'%(num_retries))
    return url, r

def on_response(future):
    try:
        url, r = future.result()
        if r:
            print('%r page is %d bytes' % (url, len(r.content)))
        else:
            print('%r page is null'%(url))
    except Exception as exc:
        print('%r generated an exception: %s' % (url, exc))

def test2():
    # We can use a with statement to ensure threads are cleaned up promptly
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        future_to_url = {executor.submit(requests_get_url2, url):url for url in URLS}
        futures = set(future_to_url)

        """
        concurrent.futures.as_completed(fs, timeout=None):
            An iterator over the given futures that yields each as it completes.

            Args:
                fs: The sequence of Futures (possibly created by different Executors) to iterate over.
                timeout: The maximum number of seconds to wait. If None, then there is no limit on the wait time.

            Returns:
                An iterator that yields the given Futures as they complete (finished or cancelled).
                If any given Futures are duplicated, they will be returned once.

            Raises:
                TimeoutError: If the entire result iterator could not be generated before the given timeout.

        """
        for future in future_to_url.keys():
            future.add_done_callback(on_response)


if __name__ == "__main__":
    #多线程执行， concurrent.futures.as_completed等待依次返回
    test1()

    #多线程执行，future.add_done_callback(on_response)注册异步回调，完全木有等待操作
    #test2()