summaryrefslogtreecommitdiffstats
path: root/src/raptor_rss.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/raptor_rss.h448
1 files changed, 448 insertions, 0 deletions
diff --git a/src/raptor_rss.h b/src/raptor_rss.h
new file mode 100644
index 0000000..0e53369
--- /dev/null
+++ b/src/raptor_rss.h
@@ -0,0 +1,448 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_rss.h - Redland Parser Toolkit Internal RSS Model and API
+ *
+ * Copyright (C) 2004-2008, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+
+#ifndef RAPTOR_RSS_H
+#define RAPTOR_RSS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef enum {
+/* CONTAINERs */
+ /* common */
+ RAPTOR_RSS_CHANNEL,
+ RAPTOR_RSS_IMAGE,
+ RAPTOR_RSS_TEXTINPUT,
+
+ /* list items */
+ RAPTOR_RSS_ITEM,
+
+ /* atom author */
+ RAPTOR_ATOM_AUTHOR,
+ /* atom link */
+ RAPTOR_ATOM_LINK,
+
+ /* itunes owner */
+ RAPTOR_ITUNES_OWNER,
+
+ /* containers but IGNORED */
+ RAPTOR_RSS_SKIPHOURS,
+ RAPTOR_RSS_SKIPDAYS,
+
+/* metadata BLOCKs */
+ RAPTOR_RSS_ENCLOSURE,
+ RAPTOR_ATOM_CATEGORY,
+ RAPTOR_RSS_SOURCE,
+
+/* serializing containers */
+ RAPTOR_ATOM_FEED,
+ RAPTOR_ATOM_ENTRY,
+
+ /* nothing found yet */
+ RAPTOR_RSS_NONE,
+
+ /* deliberately not counting NONE */
+ RAPTOR_RSS_COMMON_SIZE = RAPTOR_RSS_NONE - RAPTOR_RSS_CHANNEL,
+ RAPTOR_RSS_COMMON_IGNORED = RAPTOR_RSS_SKIPHOURS
+} raptor_rss_type;
+
+
+/* Namespaces used in RSS */
+#define RSS1_0_NAMESPACE_URI "http://purl.org/rss/1.0/"
+#define RSS0_91_NAMESPACE_URI "http://purl.org/rss/1.0/modules/rss091#"
+#define RSS2_0_ENC_NAMESPACE_URI "http://purl.oclc.org/net/rss_2.0/enc#"
+#define ATOM0_3_NAMESPACE_URI "http://purl.org/atom/ns#"
+#define DC_NAMESPACE_URI "http://purl.org/dc/elements/1.1/"
+#define RSS1_1_NAMESPACE_URI "http://purl.org/net/rss1.1#"
+#define CONTENT_NAMESPACE_URI "http://purl.org/rss/1.0/modules/content/"
+#define ATOM1_0_NAMESPACE_URI "http://www.w3.org/2005/Atom"
+#define RDF_NAMESPACE_URI "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+#define ATOMTRIPLES_NAMESPACE_URI "http://purl.org/syndication/atomtriples/1"
+#define ITUNES_NAMESPACE_URI "http://www.itunes.com/dtds/podcast-1.0.dtd"
+
+/* Old netscape namespace, turn into RSS 1.0 */
+#define RSS0_9_NAMESPACE_URI "http://my.netscape.com/rdf/simple/0.9/"
+
+typedef enum {
+ RSS_UNKNOWN_NS = 0,
+ RSS_NO_NS = 1,
+ RSS0_91_NS = 2,
+ RSS0_9_NS = 3,
+ RSS0_92_NS = RSS_NO_NS,
+ RSS2_0_NS = RSS_NO_NS,
+ RSS1_0_NS = 4,
+ ATOM0_3_NS = 5,
+ DC_NS = 6,
+ RSS2_0_ENC_NS = 7,
+ RSS1_1_NS = 8,
+ CONTENT_NS = 9,
+ ATOM1_0_NS = 10,
+ RDF_NS = 11,
+ ATOMTRIPLES_NS = 12,
+ ITUNES_NS = 13,
+
+ RAPTOR_RSS_NAMESPACES_SIZE = ITUNES_NS + 1
+} rss_info_namespace;
+
+
+typedef struct {
+ const char *uri_string;
+ const char *prefix;
+} raptor_rss_namespace_info;
+
+
+extern const raptor_rss_namespace_info raptor_rss_namespaces_info[RAPTOR_RSS_NAMESPACES_SIZE];
+
+#define RAPTOR_RSS_INFO_FLAG_URI_VALUE 1
+#define RAPTOR_RSS_INFO_FLAG_BLOCK_VALUE 2
+
+/* Namespaced elements used in feeds */
+typedef struct {
+ const char* name;
+ rss_info_namespace nspace;
+ int flags;
+ raptor_rss_type block_type;
+} raptor_rss_field_info;
+
+/* Fields of typed nodes used in RSS */
+typedef enum {
+ RAPTOR_RSS_FIELD_TITLE,
+ RAPTOR_RSS_FIELD_LINK,
+ RAPTOR_RSS_FIELD_DESCRIPTION,
+ RAPTOR_RSS_FIELD_URL, /* image */
+ RAPTOR_RSS_FIELD_NAME, /* textinput */
+ RAPTOR_RSS_FIELD_LANGUAGE, /* channel 0.91 */
+ RAPTOR_RSS_FIELD_RATING, /* channel 0.91 */
+ RAPTOR_RSS_FIELD_COPYRIGHT, /* channel 0.91 */
+ RAPTOR_RSS_FIELD_PUBDATE, /* channel 0.91, item 2.0 */
+ RAPTOR_RSS_FIELD_LASTBUILDDATE, /* channel 0.91 */
+ RAPTOR_RSS_FIELD_DOCS, /* channel 0.91 */
+ RAPTOR_RSS_FIELD_MANAGINGEDITOR,/* channel 0.91 */
+ RAPTOR_RSS_FIELD_WEBMASTER, /* channel 0.91 */
+ RAPTOR_RSS_FIELD_CLOUD, /* channel 0.92, 2.0 */
+ RAPTOR_RSS_FIELD_TTL, /* channel 2.0 */
+ RAPTOR_RSS_FIELD_WIDTH, /* image 0.91 */
+ RAPTOR_RSS_FIELD_HEIGHT, /* image 0.91 */
+ RAPTOR_RSS_FIELD_HOUR, /* skipHours 0.91 */
+ RAPTOR_RSS_FIELD_DAY, /* skipDays 0.91 */
+ RAPTOR_RSS_FIELD_GENERATOR, /* channel 0.92, 2.0 */
+ RAPTOR_RSS_FIELD_SOURCE, /* item 0.92, 2.0 */
+ RAPTOR_RSS_FIELD_AUTHOR, /* item 2.0 */
+ RAPTOR_RSS_FIELD_GUID, /* item 2.0 */
+ RAPTOR_RSS_FIELD_ENCLOSURE, /* item 0.92, 2.0 */
+ RAPTOR_RSS_RDF_ENCLOSURE, /* In RDF output, not an RSS field */
+ RAPTOR_RSS_RDF_ENCLOSURE_CLASS, /* In RDF output, not an RSS field */
+ RAPTOR_RSS_RDF_ENCLOSURE_URL, /* In RDF output, not an RSS field */
+ RAPTOR_RSS_RDF_ENCLOSURE_LENGTH, /* In RDF output, not an RSS field */
+ RAPTOR_RSS_RDF_ENCLOSURE_TYPE, /* In RDF output, not an RSS field */
+ RAPTOR_RSS_FIELD_LENGTH, /* item 0.92, 2.0 */
+ RAPTOR_RSS_FIELD_TYPE, /* item 0.92, 2.0 */
+ RAPTOR_RSS_FIELD_CATEGORY, /* item 0.92, 2.0, channel 2.0 */
+ RAPTOR_RSS_FIELD_COMMENTS, /* comments v? */
+ RAPTOR_RSS_FIELD_ITEMS, /* rss 1.0 items */
+ RAPTOR_RSS_FIELD_IMAGE, /* rss 1.0 property from channel->image) */
+ RAPTOR_RSS_FIELD_TEXTINPUT, /* rss 1.0 property from channel->textinput */
+
+ RAPTOR_RSS_FIELD_ATOM_COPYRIGHT, /* atom 0.3 copyright */
+ RAPTOR_RSS_FIELD_ATOM_CREATED, /* atom 0.3 created */
+ RAPTOR_RSS_FIELD_ATOM_ISSUED, /* atom 0.3 issued */
+ RAPTOR_RSS_FIELD_ATOM_MODIFIED, /* atom 0.3 modified */
+ RAPTOR_RSS_FIELD_ATOM_TAGLINE, /* atom 0.3 tagline */
+
+ /* atom 1.0 required fields */
+ RAPTOR_RSS_FIELD_ATOM_ID, /* atom 1.0 id */
+ RAPTOR_RSS_FIELD_ATOM_TITLE, /* atom 1.0 title */
+ RAPTOR_RSS_FIELD_ATOM_UPDATED, /* atom 1.0 updated */
+ /* atom 1.0 optional fields */
+ RAPTOR_RSS_FIELD_ATOM_AUTHOR, /* atom 1.0 author */
+ RAPTOR_RSS_FIELD_ATOM_CATEGORY, /* atom 1.0 category */
+ RAPTOR_RSS_FIELD_ATOM_CONTENT, /* atom 1.0 content */
+ RAPTOR_RSS_FIELD_ATOM_CONTRIBUTOR, /* atom 1.0 contributor */
+ RAPTOR_RSS_FIELD_ATOM_EMAIL, /* atom 1.0 email */
+ RAPTOR_RSS_FIELD_ATOM_ENTRY, /* atom 1.0 entry */
+ RAPTOR_RSS_FIELD_ATOM_FEED, /* atom 1.0 feed */
+ RAPTOR_RSS_FIELD_ATOM_GENERATOR, /* atom 1.0 generator */
+ RAPTOR_RSS_FIELD_ATOM_ICON, /* atom 1.0 icon */
+ RAPTOR_RSS_FIELD_ATOM_LINK, /* atom 1.0 link */
+ RAPTOR_RSS_FIELD_ATOM_LOGO, /* atom 1.0 logo */
+ RAPTOR_RSS_FIELD_ATOM_NAME, /* atom 1.0 name */
+ RAPTOR_RSS_FIELD_ATOM_PUBLISHED, /* atom 1.0 published */
+ RAPTOR_RSS_FIELD_ATOM_RIGHTS, /* atom 1.0 rights */
+ RAPTOR_RSS_FIELD_ATOM_SOURCE, /* atom 1.0 source */
+ RAPTOR_RSS_FIELD_ATOM_SUBTITLE, /* atom 1.0 subtitle */
+ RAPTOR_RSS_FIELD_ATOM_SUMMARY, /* atom 1.0 summary */
+ RAPTOR_RSS_FIELD_ATOM_URI, /* atom 1.0 uri */
+
+ RAPTOR_RSS_RDF_ATOM_AUTHOR_CLASS, /* In RDF output, not atom field */
+ RAPTOR_RSS_RDF_ATOM_CATEGORY_CLASS, /* In RDF output, not atom field */
+ RAPTOR_RSS_RDF_ATOM_LINK_CLASS, /* In RDF output, not atom field */
+
+ RAPTOR_RSS_FIELD_ATOM_LABEL, /* atom 1.0 attribute label */
+ RAPTOR_RSS_FIELD_ATOM_SCHEME, /* atom 1.0 attribute scheme */
+ RAPTOR_RSS_FIELD_ATOM_TERM, /* atom 1.0 attribute term */
+ RAPTOR_RSS_FIELD_ATOM_HREF, /* atom 1.0 attribute term */
+ RAPTOR_RSS_FIELD_ATOM_REL, /* atom 1.0 attribute term */
+ RAPTOR_RSS_FIELD_ATOM_TYPE, /* atom 1.0 attribute term */
+ RAPTOR_RSS_FIELD_ATOM_HREFLANG, /* atom 1.0 attribute term */
+ RAPTOR_RSS_FIELD_ATOM_LENGTH, /* atom 1.0 attribute term */
+
+ RAPTOR_RSS_FIELD_DC_TITLE, /* DC title */
+ RAPTOR_RSS_FIELD_DC_CONTRIBUTOR, /* DC contributor */
+ RAPTOR_RSS_FIELD_DC_CREATOR, /* DC creator */
+ RAPTOR_RSS_FIELD_DC_PUBLISHER, /* DC publisher */
+ RAPTOR_RSS_FIELD_DC_SUBJECT, /* DC subject */
+ RAPTOR_RSS_FIELD_DC_DESCRIPTION, /* DC description */
+ RAPTOR_RSS_FIELD_DC_DATE, /* DC date */
+ RAPTOR_RSS_FIELD_DC_TYPE, /* DC type */
+ RAPTOR_RSS_FIELD_DC_FORMAT, /* DC format */
+ RAPTOR_RSS_FIELD_DC_IDENTIFIER, /* DC identifier */
+ RAPTOR_RSS_FIELD_DC_LANGUAGE, /* DC language */
+ RAPTOR_RSS_FIELD_DC_RELATION, /* DC relation */
+ RAPTOR_RSS_FIELD_DC_SOURCE, /* DC source */
+ RAPTOR_RSS_FIELD_DC_COVERAGE, /* DC coverage */
+ RAPTOR_RSS_FIELD_DC_RIGHTS, /* DC rights */
+
+
+ RAPTOR_RSS_FIELD_CONTENT_ENCODED, /* rss 1.0 module content:encoded */
+
+ RAPTOR_RSS_FIELD_AT_CONTENT_TYPE, /* at:contentType */
+
+ RAPTOR_RSS_FIELD_ITUNES_AUTHOR,
+ RAPTOR_RSS_FIELD_ITUNES_SUBTITLE,
+ RAPTOR_RSS_FIELD_ITUNES_SUMARY,
+ RAPTOR_RSS_FIELD_ITUNES_KEYWORDS,
+ RAPTOR_RSS_FIELD_ITUNES_EXPLICIT,
+ RAPTOR_RSS_FIELD_ITUNES_IMAGE,
+ RAPTOR_RSS_FIELD_ITUNES_NAME,
+ RAPTOR_RSS_FIELD_ITUNES_OWNER,
+ RAPTOR_RSS_FIELD_ITUNES_BLOCK,
+ RAPTOR_RSS_FIELD_ITUNES_CATEGORY,
+ RAPTOR_RSS_FIELD_ITUNES_EMAIL,
+
+ RAPTOR_RSS_FIELD_UNKNOWN,
+
+ RAPTOR_RSS_FIELD_NONE,
+
+ RAPTOR_RSS_FIELDS_SIZE = RAPTOR_RSS_FIELD_UNKNOWN
+} raptor_rss_fields_type;
+
+extern const raptor_rss_field_info raptor_rss_fields_info[RAPTOR_RSS_FIELDS_SIZE+2];
+
+typedef struct raptor_rss_field_s raptor_rss_field;
+
+typedef int (*raptor_rss_field_conversion)(raptor_rss_field* from_field, raptor_rss_field* to_field);
+
+typedef struct {
+ raptor_rss_fields_type from;
+ raptor_rss_fields_type to;
+ raptor_rss_field_conversion conversion;
+} raptor_field_pair;
+
+extern const raptor_field_pair raptor_atom_to_rss[];
+
+
+#define RAPTOR_RSS_LINK_HREF_URL_OFFSET 0
+#define RAPTOR_RSS_LINK_REL_STRING_OFFSET 0
+
+#define RSS_BLOCK_FIELD_TYPE_URL 0
+#define RSS_BLOCK_FIELD_TYPE_STRING 1
+
+#define RSS_BLOCK_MAX_URLS 1
+#define RSS_BLOCK_MAX_STRINGS 5
+
+/* Feed metadata blocks support (was raptor_rss_enclosure) */
+struct raptor_rss_block_s
+{
+ raptor_rss_type rss_type;
+
+ /* enclosure: subject node URI/blank node */
+ raptor_term *identifier;
+
+ /* enclosure: node RAPTOR_RSS_ENCLOSURE
+ category: node RAPTOR_ATOM_CATEGORY
+ person: node RAPTOR_ATOM_AUTHOR or RAPTOR_ATOM_CONTRIBUTOR
+ link: node RAPTOR_ATOM_LINK
+ */
+ raptor_uri *node_type;
+
+ /* enclosure: 0: where enclosure is located - @url attr (required)
+ atom category: 0: @scheme attr (optional)
+ rss category: 0: @domain attr (optional)
+ rss source: 0: @url attr (required)
+ person: 0: @atom:uri attr (optional)
+ link: 0: @href attr (required)
+ */
+ raptor_uri *urls[RSS_BLOCK_MAX_URLS];
+
+ /* enclosure: 0: content length @length attr (required)
+ 1: content type @type attr (required)
+ atom category: 0: @term attr (required)
+ 1: @label attr (optional)
+ person: 0: @atom:name attr (required)
+ 1: @atom:email attr (optional)
+ link: 0: @length attr (optional)
+ 1: @type attr (optional)
+ 2: @rel attr (optional)
+ 3: @hreflang attr (optional)
+ 4: @title attr (optional)
+ */
+ char *strings[RSS_BLOCK_MAX_STRINGS];
+
+ /* next in list */
+ struct raptor_rss_block_s* next;
+};
+typedef struct raptor_rss_block_s raptor_rss_block;
+
+#define RAPTOR_RSS_ITEM_CONTAINER 1
+#define RAPTOR_RSS_ITEM_BLOCK 2
+typedef struct {
+ const char* name;
+ rss_info_namespace nspace;
+ int flags;
+ /* RDF class URI */
+ raptor_rss_fields_type cls;
+ /* RDF predicate URI to connect to the instance of this item */
+ raptor_rss_fields_type predicate;
+} raptor_rss_item_info;
+
+
+extern const raptor_rss_item_info raptor_rss_items_info[RAPTOR_RSS_COMMON_SIZE+1];
+
+#define RAPTOR_RSS_BLOCKS_SIZE 17
+/* Metadata blocks info */
+typedef struct {
+ /* metadata block type it applies to */
+ raptor_rss_type type;
+ /* XML attribute (or NULL for field to use to store CDATA) */
+ const char *attribute;
+ /* How that attribute should be interpreted: url or string */
+ int attribute_type;
+ /* Index into urls/strings array to store it */
+ int offset;
+ /* RDF predicate this maps to */
+ raptor_rss_fields_type field;
+} raptor_rss_block_field_info;
+
+extern const raptor_rss_block_field_info raptor_rss_block_fields_info[RAPTOR_RSS_BLOCKS_SIZE+1];
+
+
+struct raptor_rss_field_s
+{
+ raptor_world* world;
+ unsigned char* value;
+ raptor_uri* uri;
+ struct raptor_rss_field_s* next;
+ /* this field was mapped from another vocab */
+ unsigned int is_mapped:1;
+ /* value is XML */
+ unsigned int is_xml:1;
+};
+
+#define RAPTOR_RSS_FIELD_MAPPED
+
+/* RSS items (instances of typed nodes) containing fields */
+struct raptor_rss_item_s
+{
+ raptor_world* world;
+ raptor_uri *uri;
+ raptor_term* term;
+ const raptor_rss_item_info *node_type;
+ int node_typei;
+ raptor_rss_field* fields[RAPTOR_RSS_FIELDS_SIZE];
+ raptor_rss_block* blocks;
+ int fields_count;
+ struct raptor_rss_item_s* next;
+ /* Triples with this item as subject and do not fit in @fields */
+ raptor_sequence* triples;
+};
+typedef struct raptor_rss_item_s raptor_rss_item;
+
+
+/* raptor_rss_common.c */
+#define RAPTOR_RSS_N_CONCEPTS 1
+
+#define RAPTOR_RSS_RSS_items_URI(rss_model) ((rss_model)->concepts[0])
+
+
+typedef struct {
+ raptor_world* world;
+
+ /* RAPTOR_RSS_CHANNEL, RAPTOR_RSS_IMAGE, RAPTOR_RSS_TEXTINPUT */
+ raptor_rss_item* common[RAPTOR_RSS_COMMON_SIZE];
+
+ /* list of items RAPTOR_RSS_ITEM */
+ raptor_rss_item* items;
+
+ /* this points to the last one added, so we can append easy */
+ raptor_rss_item* last;
+
+ /* item count */
+ int items_count;
+
+ raptor_uri* concepts[RAPTOR_RSS_N_CONCEPTS];
+
+ raptor_namespace_stack *nstack;
+
+} raptor_rss_model;
+
+
+/* raptor_rss_common.c */
+int raptor_rss_common_init(raptor_world* world);
+void raptor_rss_common_terminate(raptor_world* world);
+
+void raptor_rss_model_init(raptor_world* world, raptor_rss_model* rss_model);
+void raptor_rss_model_clear(raptor_rss_model* rss_model);
+
+raptor_rss_item* raptor_new_rss_item(raptor_world* world);
+int raptor_rss_model_add_item(raptor_rss_model* rss_model);
+raptor_rss_item* raptor_rss_model_add_common(raptor_rss_model* rss_model, raptor_rss_type type);
+raptor_rss_item* raptor_rss_model_get_common(raptor_rss_model* rss_model, raptor_rss_type type);
+
+void raptor_clear_rss_item(raptor_rss_item* item);
+void raptor_free_rss_item(raptor_rss_item* item);
+void raptor_rss_item_add_block(raptor_rss_item* item, raptor_rss_block *block);
+void raptor_rss_item_add_field(raptor_rss_item* item, int type, raptor_rss_field* field);
+int raptor_rss_item_equals_statement_subject(const raptor_rss_item *item, const raptor_statement *statement);
+int raptor_rss_item_set_uri(raptor_rss_item *item, raptor_uri* uri);
+
+raptor_rss_block *raptor_new_rss_block(raptor_world *world, raptor_rss_type rss_type, raptor_term* block_term);
+void raptor_free_rss_block(raptor_rss_block *block);
+
+raptor_rss_field* raptor_rss_new_field(raptor_world* world);
+void raptor_rss_field_free(raptor_rss_field* field);
+
+#define RAPTOR_ISO_DATE_LEN 20
+int raptor_rss_format_iso_date(char* buffer, size_t len, time_t unix_time);
+int raptor_rss_set_date_field(raptor_rss_field* field, time_t unix_time);
+int raptor_rss_date_uplift(raptor_rss_field* to_field, const unsigned char *date_string);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif