commit be5b2503194d4395ff971ce77af3e72d8ee06704 parent 6b7002417d600c7d239ff74718d18a0666b3e571 Author: xfnw <xfnw@thunix.net> Date: Tue, 15 Dec 2020 12:31:20 -0500 go through down sites faster Diffstat:
M | urls.sh | | | 10 | ++++++++-- |
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/urls.sh b/urls.sh @@ -1,9 +1,15 @@ -wget --spider --force-html -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee wg +wget --spider --force-html --tries 1 --timeout 1 -r -l1 -H -U 'searplbot/1.0' $@ 2>&1 | tee -a wg grep '^--' wg | awk '{ print $3 }' \ | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$' \ - | tee ur + | tee -a ur + +rm wg sleep 10 php crawl.php $(cat ur | shuf) + +rm ur + +