summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthewsot@outlook.com>2022-02-07 21:02:19 -0800
committerMatthew Sotoudeh <matthewsot@outlook.com>2022-02-07 21:02:19 -0800
commitfb2865d4144ca80c9ebcb589f4692f27640f2e2f (patch)
tree781024e7c0d69a790a62025ade83895d5e0f60bf
parenta136b96913f90570e945e62a9c04827b415f0fd7 (diff)
Rudimentary support for CDATA in the RSS parser
-rw-r--r--sources/rss-source.rkt25
1 files changed, 18 insertions, 7 deletions
diff --git a/sources/rss-source.rkt b/sources/rss-source.rkt
index 8f087b1..a2a83fc 100644
--- a/sources/rss-source.rkt
+++ b/sources/rss-source.rkt
@@ -20,14 +20,25 @@
(struct rss-channel (title description items) #:transparent)
+(define (stringifyable xexpr) (or (string? xexpr) (cdata? xexpr)))
+(define (stringify xexpr)
+ (cond
+ [(string? xexpr) xexpr]
+ [(cdata? xexpr) (let* ([str (cdata-string xexpr)]
+ [len (string-length str)]
+ [prefix-len (string-length "<![CDATA]")]
+ [suffix-len (string-length "]]>")])
+ (substring str prefix-len (- len suffix-len)))]
+ [else #f]))
+
(define (parse-rss xexpr)
; Some RSS feeds drop the items directly under the <rdf> (see: arXiv) while
; others wrap them in a <channel>. This picks up both.
(cons (parse-channel xexpr)
(map parse-channel (find-children 'channel xexpr))))
(define (parse-channel xexpr)
- (rss-channel (se-path* '(channel title) xexpr)
- (se-path* '(channel description) xexpr)
+ (rss-channel (stringify (se-path* '(channel title) xexpr))
+ (stringify (se-path* '(channel description) xexpr))
(filter (curry assoc "title")
; Youtube uses entry, not item
(append (map parse-item (find-children 'entry xexpr))
@@ -40,12 +51,12 @@
(match xexprs
['() '()]
[`((title ,attrs ,title) ,rest ...)
- #:when (and (string? title) (not (member 'title already-seen)))
- `(("title" . ,title) .
- ,(parse-out-attrs rest `(title . already-seen)))]
+ #:when (and (stringifyable title) (not (member 'title already-seen)))
+ `(("title" . ,(stringify title)) .
+ ,(parse-out-attrs rest `(title . ,already-seen)))]
[`((description ,attrs ,descr) ,rest ...)
- #:when (and (string? descr) (not (member 'descr already-seen)))
- `(("description" . ,descr) .
+ #:when (and (stringifyable descr) (not (member 'descr already-seen)))
+ `(("description" . ,(stringify descr)) .
,(parse-out-attrs rest `(descr . ,already-seen)))]
[`((link ,attrs ,url) ,rest ...)
#:when (and (string? url) (not (member 'url already-seen)))
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback