commit 8c4421108b6b070a146ec2990d1f3e93dd7d5953
parent 75be84f895e05f4c51caa3fcadb0d5fe21b28404
Author: xfnw <xfnw@thunix.net>
Date: Tue, 15 Dec 2020 09:59:19 -0500
dont track svgs and drop the / from the end of urls so they wont be duplicated
Diffstat:
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/crawl.php b/crawl.php
@@ -28,6 +28,7 @@ $arg = $argv;
array_shift($arg);
foreach ($arg as $url) {
+ $url = preg_replace('/\/$/','',$url);
$file = file_get_contents($url);
if (!$file)
continue;
diff --git a/urls.sh b/urls.sh
@@ -1,3 +1,3 @@
wget --spider --force-html -r -l1 -H $@ 2>&1 \
| grep '^--' | awk '{ print $3 }' \
- | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\)$'
+ | grep -v '\.\(css\|js\|png\|gif\|jpg\|txt\|ico\|ttf\|svg\)$'