11# -*- coding: utf-8 -*-
22from __future__ import print_function
33import sys
4+ import os
5+ import re
46import argparse
57from waybackpy .wrapper import Url
68from waybackpy .__version__ import __version__
@@ -31,6 +33,36 @@ def _near(obj, args):
3133 _near_args ["minute" ] = args .minute
3234 return (obj .near (** _near_args ))
3335
36+ def _known_urls (obj , args ):
37+ sd = False
38+ al = False
39+ if args .subdomain :
40+ sd = True
41+ if args .alive :
42+ al = True
43+ url_list = obj .known_urls (alive = al , subdomain = sd )
44+ total_urls = len (url_list )
45+
46+ if total_urls > 0 :
47+ m = re .search ('https?://([A-Za-z_0-9.-]+).*' , url_list [0 ])
48+ if m :
49+ domain = m .group (1 )
50+ else :
51+ domain = "waybackpy-known"
52+ dir_path = os .path .abspath (os .getcwd ())
53+ file_name = dir_path + "/%s-%d-urls.txt" % (domain , total_urls )
54+ text = "\n " .join (url_list ) + "\n "
55+ with open (file_name , "a+" ) as f :
56+ f .write (text )
57+ text = text + "%d URLs found and saved in ./%s-%d-urls.txt" % (
58+ total_urls , domain , total_urls
59+ )
60+
61+ else :
62+ text = "No known URLs found. Please try a diffrent domain!"
63+
64+ return text
65+
3466def _get (obj , args ):
3567 if args .get .lower () == "url" :
3668 return (obj .get ())
@@ -52,10 +84,10 @@ def _get(obj, args):
5284
5385def args_handler (args ):
5486 if args .version :
55- return (__version__ )
87+ return ("waybackpy version %s" % __version__ )
5688
5789 if not args .url :
58- return ("Specify an URL. See --help for help using waybackpy." )
90+ return ("waybackpy %s \n See 'waybackpy --help' for help using this tool." % __version__ )
5991
6092 if args .user_agent :
6193 obj = Url (args .url , args .user_agent )
@@ -72,26 +104,54 @@ def args_handler(args):
72104 return _total_archives (obj )
73105 if args .near :
74106 return _near (obj , args )
107+ if args .known_urls :
108+ return _known_urls (obj , args )
75109 if args .get :
76110 return _get (obj , args )
77- return ("Usage: waybackpy --url [ URL] --user_agent [USER AGENT] [OPTIONS]. See --help for help using waybackpy ." )
111+ return ("You only specified the URL. But you also need to specify the operation. \n See 'waybackpy --help' for help using this tool ." )
78112
79113def parse_args (argv ):
80114 parser = argparse .ArgumentParser ()
81- parser .add_argument ("-u" , "--url" , help = "URL on which Wayback machine operations would occur." )
82- parser .add_argument ("-ua" , "--user_agent" , help = "User agent, default user_agent is \" waybackpy python package - https://github.com/akamhy/waybackpy\" ." )
83- parser .add_argument ("-s" , "--save" , action = 'store_true' , help = "Save the URL on the Wayback machine." )
84- parser .add_argument ("-o" , "--oldest" , action = 'store_true' , help = "Oldest archive for the specified URL." )
85- parser .add_argument ("-n" , "--newest" , action = 'store_true' , help = "Newest archive for the specified URL." )
86- parser .add_argument ("-t" , "--total" , action = 'store_true' , help = "Total number of archives for the specified URL." )
87- parser .add_argument ("-g" , "--get" , help = "Prints the source code of the supplied url. Use '--get help' for extended usage." )
88- parser .add_argument ("-v" , "--version" , action = 'store_true' , help = "Prints the waybackpy version." )
89- parser .add_argument ("-N" , "--near" , action = 'store_true' , help = "Latest/Newest archive for the specified URL." )
90- parser .add_argument ("-Y" , "--year" , type = int , help = "Year in integer. For use with --near." )
91- parser .add_argument ("-M" , "--month" , type = int , help = "Month in integer. For use with --near." )
92- parser .add_argument ("-D" , "--day" , type = int , help = "Day in integer. For use with --near." )
93- parser .add_argument ("-H" , "--hour" , type = int , help = "Hour in integer. For use with --near." )
94- parser .add_argument ("-MIN" , "--minute" , type = int , help = "Minute in integer. For use with --near." )
115+
116+ requiredArgs = parser .add_argument_group ('URL argument (required)' )
117+ requiredArgs .add_argument ("--url" , "-u" , help = "URL on which Wayback machine operations would occur" )
118+
119+ userAgentArg = parser .add_argument_group ('User Agent' )
120+ userAgentArg .add_argument ("--user_agent" , "-ua" , help = "User agent, default user_agent is \" waybackpy python package - https://github.com/akamhy/waybackpy\" " )
121+
122+ saveArg = parser .add_argument_group ("Create new archive/save URL" )
123+ saveArg .add_argument ("--save" , "-s" , action = 'store_true' , help = "Save the URL on the Wayback machine" )
124+
125+ oldestArg = parser .add_argument_group ("Oldest archive" )
126+ oldestArg .add_argument ("--oldest" , "-o" , action = 'store_true' , help = "Oldest archive for the specified URL" )
127+
128+ newestArg = parser .add_argument_group ("Newest archive" )
129+ newestArg .add_argument ("--newest" , "-n" , action = 'store_true' , help = "Newest archive for the specified URL" )
130+
131+ totalArg = parser .add_argument_group ("Total number of archives" )
132+ totalArg .add_argument ("--total" , "-t" , action = 'store_true' , help = "Total number of archives for the specified URL" )
133+
134+ getArg = parser .add_argument_group ("Get source code" )
135+ getArg .add_argument ("--get" , "-g" , help = "Prints the source code of the supplied url. Use '--get help' for extended usage" )
136+
137+ knownUrlArg = parser .add_argument_group ("URLs known and archived to Waybcak Machine for the site." )
138+ knownUrlArg .add_argument ("--known_urls" , "-ku" , action = 'store_true' , help = "URLs known for the domain." )
139+ knownUrlArg .add_argument ("--subdomain" , "-sub" , action = 'store_true' , help = "Use with '--known_urls' to include known URLs for subdomains." )
140+ knownUrlArg .add_argument ("--alive" , "-a" , action = 'store_true' , help = "Only include live URLs. Will not inlclude dead links." )
141+
142+
143+ nearArg = parser .add_argument_group ('Archive close to time specified' )
144+ nearArg .add_argument ("--near" , "-N" , action = 'store_true' , help = "Archive near specified time" )
145+
146+ nearArgs = parser .add_argument_group ('Arguments that are used only with --near' )
147+ nearArgs .add_argument ("--year" , "-Y" , type = int , help = "Year in integer" )
148+ nearArgs .add_argument ("--month" , "-M" , type = int , help = "Month in integer" )
149+ nearArgs .add_argument ("--day" , "-D" , type = int , help = "Day in integer." )
150+ nearArgs .add_argument ("--hour" , "-H" , type = int , help = "Hour in intege" )
151+ nearArgs .add_argument ("--minute" , "-MIN" , type = int , help = "Minute in integer" )
152+
153+ parser .add_argument ("--version" , "-v" , action = 'store_true' , help = "Waybackpy version" )
154+
95155 return parser .parse_args (argv [1 :])
96156
97157def main (argv = None ):
0 commit comments