diff options
author | Matthew Sotoudeh <matthewsot@outlook.com> | 2021-12-31 02:14:22 -0800 |
---|---|---|
committer | Matthew Sotoudeh <matthewsot@outlook.com> | 2021-12-31 02:14:22 -0800 |
commit | 5d2d64a898ec7694d4b5adcac6db4f6f0cea9b91 (patch) | |
tree | bdeede4d5d5eb341d9ae325029ad05fb839e9bb8 | |
parent | 1cfe6c8e5b1a88cc40c8b884332f1e8e6bac9bc0 (diff) |
Small fixes, better ascii filtering
-rw-r--r-- | audrey3/utils.rkt | 10 | ||||
-rw-r--r-- | sources/rss-source.rkt | 8 |
2 files changed, 14 insertions, 4 deletions
diff --git a/audrey3/utils.rkt b/audrey3/utils.rkt index fa7a6b6..ac85e11 100644 --- a/audrey3/utils.rkt +++ b/audrey3/utils.rkt @@ -26,9 +26,13 @@ (charterm-cursor 1 first-line) (clear-lines-here n-lines)) (define (strip-to-ascii str) - (string-replace - (string-replace (string-replace str "’" "'") "‘" "'") - "\n" " ")) + (list->string + (filter + (lambda (c) (and (char>=? c #\ ) (char<=? c #\~))) + (string->list + (string-replace + (string-replace (string-replace str "’" "'") "‘" "'") + "\n" " "))))) (define (string->hashed str) (bytes->string/locale (md5 str))) diff --git a/sources/rss-source.rkt b/sources/rss-source.rkt index 0c5b519..8f087b1 100644 --- a/sources/rss-source.rkt +++ b/sources/rss-source.rkt @@ -29,7 +29,9 @@ (rss-channel (se-path* '(channel title) xexpr) (se-path* '(channel description) xexpr) (filter (curry assoc "title") - (map parse-item (find-children 'item xexpr))))) + ; Youtube uses entry, not item + (append (map parse-item (find-children 'entry xexpr)) + (map parse-item (find-children 'item xexpr)))))) (define (parse-item xexpr) (parse-out-attrs (cddr xexpr))) @@ -49,6 +51,10 @@ #:when (and (string? url) (not (member 'url already-seen))) `(("url" . ,url) . ,(parse-out-attrs rest `(url . ,already-seen)))] + [`((link ,attrs) ,rest ...) + #:when (and (assoc 'href attrs) (not (member 'url already-seen))) + `(("url" . ,(cadr (assoc 'href attrs))) . + ,(parse-out-attrs rest `(url . ,already-seen)))] [`((,(or 'pubDate 'published) ,attrs ,date-str) ,rest ...) #:when (and (string? date-str) (not (member 'timestamp already-seen))) (let ([format (pick-format date-str)]) |