MyInvestmentModules/Python/MarketCap.py at master · kimpro82/MyInvestmentModules · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import requests
from bs4 import BeautifulSoup


# 0. Read byte stream from url

url = 'https://finance.naver.com/sise/sise_market_sum.naver'
params = {
    'sosok' : '0',                                              # 1 : KOSPI, 2 : KOSDAQ
    'page' : '1',
}

response = requests.get(url, params=params)
# print(response)                                               # <Response [200]>
# print(type(response))                                         # <class 'requests.models.Response'>
# print(response.content)                                       # byte stream (encoded) : b'\n\n\n\n\n\n\n<!--
# print(type(response.content))                                 # <class 'bytes'>

soup = BeautifulSoup(response.content, "html.parser")
# print(soup)
# print(type(soup))                                             # <class 'bs4.BeautifulSoup'>


# 1.0 Read one stock's data

section = soup.find('tbody')
items = section.find_all('tr', onmouseover="mouseOver(this)")[0]
# print(items)
basic_info = items.get_text()
sinfo = basic_info.split("\n")
for i in range(len(sinfo)) :
    print(i, sinfo[i])

'''
0
1 1
2 삼성전자
3 79,400
……
'''


# 1.1 Read one page's data

section = soup.find('tbody')
items = section.find_all('tr', onmouseover="mouseOver(this)")

for item in items :
    basic_info = item.get_text()
    sinfo = basic_info.split("\n")

    # print(sinfo[1] + '\t' + sinfo[2] + '\t' + sinfo[3] + '\t' + sinfo[15])

    if sinfo[5] != '0' :                                        # data locations are moved when the price change is 0
        list = [1, 2, 3, 15]
    else :
        list = [1, 2, 3, 11]
    length = [4, 20, 9, 9]

    for i in range(len(list)) :
        spaces = length[i]
        for char in sinfo[list[i]] :
            if char >= '가' :                                   # count 2 spaces when the letter is Korean
                spaces -= 2
            else :
                spaces -= 1
        sinfo[list[i]] += spaces * ' '

        print(sinfo[list[i]], end=' ')

    print()

'''
1    삼성전자             79,400    4,740,007
2    SK하이닉스           127,000   924,563
3    NAVER                378,500   621,737
4    삼성바이오로직스     901,000   596,147
5    삼성전자우           71,700    590,010
……
'''


# 1.2 Read plural pages' data

pages = 2                                                       # input the last page's number

for page in range(1, pages + 1) :

    # Read data in each page
    params['page'] = page                                       # change the page number in the url
    response = requests.get(url, params=params)
    soup = BeautifulSoup(response.content, "html.parser")
    section = soup.find('tbody')
    items = section.find_all('tr', onmouseover="mouseOver(this)")

    # Get specific elements
    for item in items :
        basic_info = item.get_text()
        sinfo = basic_info.split("\n")

        # Set the required elements' location and their maximum size
        if sinfo[5] != '0' :                                    # data locations are moved when the price change is 0
            list = [1, 2, 3, 15]
        else :
            list = [1, 2, 3, 11]
        length = [4, 22, 9, 9]                                  # len("한국타이어앤테크놀로지") = 22

        # Print data
        for i in range(len(list)) :

            spaces = length[i]

            # Find the exact length of the text
            for char in sinfo[list[i]] :
                if char >= '가' :                               # count 2 spaces when the letter is Korean
                    spaces -= 2
                else :
                    spaces -= 1

            # Determine left or right alignment
            if sinfo[list[i]][0] < 'A' :
                sinfo[list[i]] = spaces * ' ' + sinfo[list[i]]
            else :
                sinfo[list[i]] += spaces * ' '

            # Print
            print(sinfo[list[i]], end=' ')

        print()

'''
   1 삼성전자                  79,400 4,740,007
   2 SK하이닉스               127,000   924,563
   3 NAVER                    378,500   621,737
……
  73 한국타이어앤테크놀로지    39,850    49,364
……
 100 한솔케미칼               293,500    33,269
'''