summaryrefslogtreecommitdiffstats
path: root/share/extensions/other/clipart/sources/ocal.py
diff options
context:
space:
mode:
Diffstat (limited to 'share/extensions/other/clipart/sources/ocal.py')
-rw-r--r--share/extensions/other/clipart/sources/ocal.py97
1 files changed, 97 insertions, 0 deletions
diff --git a/share/extensions/other/clipart/sources/ocal.py b/share/extensions/other/clipart/sources/ocal.py
new file mode 100644
index 0000000..2fe06db
--- /dev/null
+++ b/share/extensions/other/clipart/sources/ocal.py
@@ -0,0 +1,97 @@
+#
+# Copyright 2021 Martin Owens <doctormo@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>
+#
+
+import sys
+import json
+import logging
+
+from import_sources import RemoteSource, RemoteFile
+from urllib.parse import urljoin, parse_qs
+
+try:
+ from bs4 import BeautifulSoup
+except ImportError:
+ BeautifulSoup = None
+
+
+class OpenClipartFile(RemoteFile):
+ def get_file(self):
+ """Extract search result from html"""
+ response = self.remote.session.get(self.info["file"])
+ soup = BeautifulSoup(response.text, features="lxml")
+ for script in soup.find_all("script"):
+ content = script.contents
+ if content and "image" in content[0]:
+ try:
+ data = json.loads(content[0])
+ return self.remote.to_local_file(data["image"]["url"])
+ except Exception:
+ continue
+ logging.error("Couldn't load svg from %s", self.info["file"])
+
+
+class OpenClipart(RemoteSource):
+ name = "Open Clipart Library"
+ icon = "sources/ocal.svg"
+ base_url = "https://openclipart.org/search/"
+ is_enabled = BeautifulSoup is not None
+ file_cls = OpenClipartFile
+
+ def html_search(self, response):
+ """Extract search results from html"""
+ soup = BeautifulSoup(response.text, features="lxml")
+ for div in soup.find_all("div", {"class": "artwork"}):
+ if div.a and div.a.img:
+ link = urljoin(self.base_url, div.a.get("href"))
+ img = urljoin(self.base_url, div.a.img.get("src"))
+
+ yield {
+ "file": link, # Not the actual file yet (see above)
+ "name": div.a.img.get("alt"),
+ "thumbnail": img,
+ "author": "OpenClipart",
+ "license": "cc-0",
+ }
+
+ for page in soup.find_all("a", {"class": "page-link", "aria-label": "Next"}):
+ if "=" in page.get("href", ""):
+ yield lambda: self._search(**parse_qs(page.get("href").split("?")[-1]))
+
+ def search(self, query):
+ """HTML searching for now"""
+ return self._search(query=query)
+
+ def _search(self, **params):
+ try:
+ response = self.session.get(self.base_url, params=params)
+ except Exception:
+ return []
+
+ items = []
+ next_page = None
+ for item in self.html_search(response):
+ if callable(item):
+ next_page = item
+ else:
+ items.append(item)
+ # Often ocal will have empty pages, weirdly.
+ if not items and next_page:
+ return next_page()
+ # None empty page, return all
+ if next_page:
+ items.append(next_page)
+ return items