commit 2b46e8899fdf08b6c67275ef152c1f15b5ea020d parent 8c4421108b6b070a146ec2990d1f3e93dd7d5953 Author: xfnw <xfnw@thunix.net> Date: Tue, 15 Dec 2020 10:17:28 -0500 set a useragent Diffstat:
M | urls.sh | | | 12 | +++++++++--- |
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/urls.sh b/urls.sh @@ -1,3 +1,9 @@ -wget --spider --force-html -r -l1 -H $@ 2>&1 \ - | grep '^--' | awk '{ print $3 }' \ - | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' +wget --spider --force-html -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee wg + +grep '^--' wg | awk '{ print $3 }' \ + | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' \ + | tee ur + +sleep 10 + +php crawl.php $(cat ur)