mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
163 lines
4.9 KiB
Python
163 lines
4.9 KiB
Python
"""$Id: rdf.py 699 2006-09-25 02:01:18Z rubys $"""
|
|
|
|
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
|
|
__version__ = "$Revision: 699 $"
|
|
__date__ = "$Date: 2006-09-25 02:01:18 +0000 (Mon, 25 Sep 2006) $"
|
|
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
|
|
|
|
from base import validatorBase
|
|
from logging import *
|
|
from validators import rdfAbout, noduplicates, text, eater
|
|
from root import rss11_namespace as rss11_ns
|
|
from extension import extension_everywhere
|
|
|
|
rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
|
|
#
|
|
# rdf:RDF element. The valid children include "channel", "item", "textinput", "image"
|
|
#
|
|
class rdf(validatorBase,object):
|
|
|
|
def do_rss090_channel(self):
|
|
from channel import channel
|
|
self.dispatcher.defaultNamespaces.append("http://my.netscape.com/rdf/simple/0.9/")
|
|
return channel(), noduplicates()
|
|
|
|
def do_channel(self):
|
|
from channel import rss10Channel
|
|
return rdfAbout(), rss10Channel(), noduplicates()
|
|
|
|
def _is_090(self):
|
|
return "http://my.netscape.com/rdf/simple/0.9/" in self.dispatcher.defaultNamespaces
|
|
|
|
def _withAbout(self,v):
|
|
if self._is_090():
|
|
return v
|
|
else:
|
|
return v, rdfAbout()
|
|
|
|
def do_item(self):
|
|
from item import rss10Item
|
|
return self._withAbout(rss10Item())
|
|
|
|
def do_textinput(self):
|
|
from textInput import textInput
|
|
return self._withAbout(textInput())
|
|
|
|
def do_image(self):
|
|
return self._withAbout(rss10Image())
|
|
|
|
def do_cc_License(self):
|
|
return eater()
|
|
|
|
def do_taxo_topic(self):
|
|
return eater()
|
|
|
|
def do_rdf_Description(self):
|
|
return eater()
|
|
|
|
def prevalidate(self):
|
|
self.setFeedType(TYPE_RSS1)
|
|
|
|
def validate(self):
|
|
if not "channel" in self.children and not "rss090_channel" in self.children:
|
|
self.log(MissingElement({"parent":self.name.replace('_',':'), "element":"channel"}))
|
|
|
|
from validators import rfc2396_full
|
|
|
|
class rss10Image(validatorBase, extension_everywhere):
|
|
def validate(self):
|
|
if not "title" in self.children:
|
|
self.log(MissingTitle({"parent":self.name, "element":"title"}))
|
|
if not "link" in self.children:
|
|
self.log(MissingLink({"parent":self.name, "element":"link"}))
|
|
if not "url" in self.children:
|
|
self.log(MissingElement({"parent":self.name, "element":"url"}))
|
|
|
|
def do_title(self):
|
|
from image import title
|
|
return title(), noduplicates()
|
|
|
|
def do_link(self):
|
|
return rfc2396_full(), noduplicates()
|
|
|
|
def do_url(self):
|
|
return rfc2396_full(), noduplicates()
|
|
|
|
def do_dc_creator(self):
|
|
return text()
|
|
|
|
def do_dc_subject(self):
|
|
return text() # duplicates allowed
|
|
|
|
def do_dc_date(self):
|
|
from validators import w3cdtf
|
|
return w3cdtf(), noduplicates()
|
|
|
|
def do_cc_license(self):
|
|
return eater()
|
|
|
|
#
|
|
# This class performs RSS 1.x specific validations on extensions.
|
|
#
|
|
class rdfExtension(validatorBase):
|
|
def __init__(self, qname, literal=False):
|
|
validatorBase.__init__(self)
|
|
self.qname=qname
|
|
self.literal=literal
|
|
|
|
def textOK(self):
|
|
pass
|
|
|
|
def setElement(self, name, attrs, parent):
|
|
validatorBase.setElement(self, name, attrs, parent)
|
|
|
|
if attrs.has_key((rdfNS,"parseType")):
|
|
if attrs[(rdfNS,"parseType")] == "Literal": self.literal=True
|
|
|
|
if not self.literal:
|
|
|
|
# ensure no rss11 children
|
|
if self.qname==rss11_ns:
|
|
from logging import UndefinedElement
|
|
self.log(UndefinedElement({"parent":parent.name, "element":name}))
|
|
|
|
# no duplicate rdf:abouts
|
|
if attrs.has_key((rdfNS,"about")):
|
|
about = attrs[(rdfNS,"about")]
|
|
if not "abouts" in self.dispatcher.__dict__:
|
|
self.dispatcher.__dict__["abouts"] = []
|
|
if about in self.dispatcher.__dict__["abouts"]:
|
|
self.log(DuplicateValue(
|
|
{"parent":parent.name, "element":"rdf:about", "value":about}))
|
|
else:
|
|
self.dispatcher.__dict__["abouts"].append(about)
|
|
|
|
def getExpectedAttrNames(self):
|
|
# no rss11 attributes
|
|
if self.literal or not self.attrs: return self.attrs.keys()
|
|
return [(ns,n) for ns,n in self.attrs.keys() if ns!=rss11_ns]
|
|
|
|
def validate(self):
|
|
# rdflib 2.0.5 does not catch mixed content errors
|
|
if self.value.strip() and self.children and not self.literal:
|
|
self.log(InvalidRDF({"message":"mixed content"}))
|
|
|
|
def startElementNS(self, name, qname, attrs):
|
|
# ensure element is "namespace well formed"
|
|
if name.find(':') != -1:
|
|
from logging import MissingNamespace
|
|
self.log(MissingNamespace({"parent":self.name, "element":name}))
|
|
|
|
# ensure all attribute namespaces are properly defined
|
|
for (namespace,attr) in attrs.keys():
|
|
if ':' in attr and not namespace:
|
|
from logging import MissingNamespace
|
|
self.log(MissingNamespace({"parent":self.name, "element":attr}))
|
|
|
|
# eat children
|
|
self.children.append((qname,name))
|
|
self.push(rdfExtension(qname, self.literal), name, attrs)
|
|
|
|
def characters(self, string):
|
|
if not self.literal: validatorBase.characters(self, string)
|