Skip to content

Commit 31759c1

Browse files
committed
By mistake previous commit didn't include files :(
1 parent 534b704 commit 31759c1

File tree

4 files changed

+171
-0
lines changed

4 files changed

+171
-0
lines changed

backup.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
import mechanize
5+
import cookielib
6+
import re
7+
import sys
8+
import getopt
9+
from config import *
10+
from globals import *
11+
from BeautifulSoup import *
12+
from urllib2 import HTTPError
13+
14+
def setupBrowser():
15+
# Setup a browser instance...
16+
browser_ = mechanize.Browser()
17+
browser_.set_handle_redirect(False)
18+
browser_.set_handle_robots(False)
19+
20+
# Add necessary headers for User-agent
21+
browser_.addheaders = [('User-agent', globals.USERAGENT)]
22+
23+
return browser_
24+
25+
def setupCookies(browser):
26+
# Setup a cookie handler and attach to browser
27+
cookies = cookielib.LWPCookieJar()
28+
browser.set_cookiejar(cookies)
29+
30+
def Login(browser, username, passwd):
31+
result = False
32+
try:
33+
# Open forum page
34+
browser.open(globals.URL)
35+
36+
# Once opened the page, look for the login form and fill it
37+
browser.select_form('form_login');
38+
browser['username']= username
39+
browser['password'] = passwd
40+
browser['autologin'] = ['on']
41+
# And submit. We expect a 302 redirection status after POSTing the
42+
# form for login...
43+
browser.submit()
44+
except HTTPError, e:
45+
# We are expecting a 302 status code after login, that means in this case
46+
# everything wents ok
47+
if e.code == 302:
48+
# Move back to main URL
49+
result = True
50+
else:
51+
raise HTTPError
52+
53+
return result
54+
55+
56+
# Return list of sections in a list of tuples, including calculated index...
57+
def getSections(html, debug = False):
58+
# Feed beautifulSoup with the read html
59+
# Read sectionsl
60+
soup = BeautifulSoup(html)
61+
sectionNum = 0
62+
sectionList = []
63+
for link in soup.findAll('a', attrs={'class':'forumtitle'}):
64+
sectionNum += 1
65+
sectionList.append((sectionNum, link.text, link['href']))
66+
67+
if (debug):
68+
print "Número de secciones encontradas: ", sectionNum
69+
print "==============================================="
70+
print "SQL:"
71+
for idx, name, url in sectionList:
72+
print idx, '.- INSERT INTO sections(NAME, URL) VALUES("%(name)s", "%(url)s");' % \
73+
{'name': name, 'url': url}
74+
75+
return sectionList
76+
77+
78+
# get TID, will be used later on for admin purpouses
79+
def getAdminTid(html):
80+
_tid = ''
81+
adminref = '^/admin/index.forum\?part=admin\&tid=';
82+
soup = BeautifulSoup(html)
83+
# Find exact link (a href=) that begins with above href, will contain TID at the end
84+
for link in soup.findAll('a', attrs={'href':re.compile(adminref)}):
85+
tid = link['href']
86+
# TID value will be after last = (first from right side)
87+
tid = tid[tid.rindex('=')+1:]
88+
_tid = tid
89+
90+
return _tid
91+
92+
93+
def backup(section):
94+
# Setup credentials from config file
95+
getCredentials(section)
96+
97+
# Obtain a browser instance finely setup
98+
browser = setupBrowser()
99+
100+
# Prepare cookies handling mechanism
101+
setupCookies(browser)
102+
103+
# Login and get initial webpage for admin user
104+
# NOTE: assert will stop exec if login failed
105+
Login(browser, globals.USERNAME, globals.PASSWD)
106+
107+
# Cool! login was fine, move back to main URL and get html
108+
response = browser.open(globals.URL)
109+
html = response.read()
110+
111+
# Get main sections from given html
112+
sections = getSections(html)
113+
114+
# Get administration TID (and assert value is valid!)
115+
tid = getAdminTid(html)
116+
assert tid != ''
117+
118+
119+
# Get list of messages from a given section
120+
#def getMessagesList(browser, section
121+
122+
if __name__ == '__main__':
123+
124+
if len(sys.argv) < 2:
125+
print 'Must give a section name from your credentials file'
126+
sys.exit(2)
127+
backup(sys.argv[1])

config.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
from ConfigParser import *
5+
from globals import globals
6+
7+
globals_ = globals
8+
9+
def getCredentials(section):
10+
config_ = ConfigParser()
11+
config_.readfp(open('.credentials'))
12+
if config_.has_section(section):
13+
globals_.USERNAME = config_.get(section, 'username')
14+
globals_.PASSWD = config_.get(section, 'password')
15+
globals_.URL = config_.get(section, 'url')
16+
else:
17+
raise NoSectionError('given section %(name)s does not exist in credentials file' % {'name': section})
18+
19+
def getVal(config, section, item):
20+
# First check we are working with the correct object type
21+
assert type(config) == ConfigParser
22+
if config_.section.has_option(item):
23+
return config_.get(section, item)
24+
else:
25+
raise NoOptionError('option %(name)s does not exist in credentials file' % {'name': item})
26+
27+
if __name__ == '__main__':
28+
assert getCredentials('clubsuperblackbird') == True
29+
print globals_.USERNAME, globals_.PASSWD

credentials

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[section]
2+
username=<username>
3+
password=<password>
4+
url=<url>
5+

globals.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# -*- coding: utf-8 -*-
2+
3+
""" This class will keep global variables all together in a single point """
4+
class globals:
5+
# Sections of constants
6+
USERNAME = ''
7+
PASSWD = ''
8+
URL = ''
9+
USERAGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/14.0.1'
10+
TID = ''

0 commit comments

Comments
 (0)