void-packages/srcpkgs/python3-syndom/patches/add-author-extraction-for-f...

104 lines
4.1 KiB
Diff
Raw Normal View History

2023-10-06 15:31:45 +02:00
From: Gabriele Musco <gabmus@disroot.org>
Date: Sat, 25 Mar 2023 14:13:44 +0100
Subject: add author extraction for feed item
(cherry picked from commit 75920321062d682437f3fb0319dad227d8b18f6c)
---
src/feed_item.cpp | 12 +++++++++---
src/feed_item.hpp | 14 ++++++++++++++
src/pybind.cpp | 4 ++++
3 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/src/feed_item.cpp b/src/feed_item.cpp
index a08cd71..e0dbb8c 100644
--- a/src/feed_item.cpp
+++ b/src/feed_item.cpp
@@ -1,5 +1,5 @@
-#include "feed_item.hpp"
-#include "utils.hpp"
+#include "./feed_item.hpp"
+#include "./utils.hpp"
std::string FeedItem::extract_url() {
std::string res = item_node.child("link").text().as_string();
@@ -75,6 +75,10 @@ void FeedItem::parse() {
// pub_date
pub_date = SynDomUtils::extract_from_node(item_node, __PUB_DATE_PARAMS);
+ // author
+ author_name = SynDomUtils::extract_from_node(item_node, __AUTHOR_NAME_PARAMS);
+ author_url = SynDomUtils::extract_from_node(item_node, __AUTHOR_URL_PARAMS);
+
// img_url
img_url = extract_img_url();
fix_url(img_url);
@@ -87,6 +91,8 @@ std::string FeedItem::to_json() {
" \"url\": \"" + url + "\",\n"
" \"media_url\": \"" + media_url + "\",\n"
" \"pub_date\": \"" + pub_date + "\",\n"
- " \"img_url\": \"" + img_url + "\"\n"
+ " \"img_url\": \"" + img_url + "\",\n"
+ " \"author_name\": \"" + author_name + "\",\n"
+ " \"author_url\": \"" + author_url + "\"\n"
" }";
}
diff --git a/src/feed_item.hpp b/src/feed_item.hpp
index 5d7105e..fd1259f 100644
--- a/src/feed_item.hpp
+++ b/src/feed_item.hpp
@@ -28,6 +28,8 @@ private:
std::string media_url;
std::string pub_date;
std::string img_url;
+ std::string author_name;
+ std::string author_url;
/**
* Tries to extract the item url and returns it.
@@ -73,6 +75,16 @@ private:
{ExtractionParam::ParamType::CHILD, {"date"}},
{ExtractionParam::ParamType::CHILD, {"dc:date"}}
};
+ static inline const std::vector<ExtractionParam> __AUTHOR_NAME_PARAMS{
+ {ExtractionParam::ParamType::CHILD, {"author", "name"}},
+ {ExtractionParam::ParamType::CHILD, {"author"}},
+ {ExtractionParam::ParamType::CHILD, {"dc:creator"}},
+ {ExtractionParam::ParamType::CHILD, {"creator"}},
+ {ExtractionParam::ParamType::CHILD, {"itunes:author"}},
+ };
+ static inline const std::vector<ExtractionParam> __AUTHOR_URL_PARAMS{
+ {ExtractionParam::ParamType::CHILD, {"author", "uri"}}
+ };
/**
* Entry point of the class, parses all the relevant content. Called by
* the constructor.
@@ -101,6 +113,8 @@ public:
std::string get_media_url() { return media_url; }
std::string get_pub_date() { return pub_date; }
std::string get_img_url() { return img_url; }
+ std::string get_author_name() { return author_name; }
+ std::string get_author_url() { return author_url; }
/**
* Represents the FeedItem object (itself) as a json, returned as a string.
diff --git a/src/pybind.cpp b/src/pybind.cpp
index bef72f9..1d5a58d 100644
--- a/src/pybind.cpp
+++ b/src/pybind.cpp
@@ -19,11 +19,15 @@ PYBIND11_MODULE(syndom, m) {
.def_property_readonly("media_url", &FeedItem::get_media_url)
.def_property_readonly("pub_date", &FeedItem::get_pub_date)
.def_property_readonly("img_url", &FeedItem::get_img_url)
+ .def_property_readonly("author_name", &FeedItem::get_author_name)
+ .def_property_readonly("author_url", &FeedItem::get_author_url)
.def("get_title", &FeedItem::get_title)
.def("get_content", &FeedItem::get_content)
.def("get_url", &FeedItem::get_url)
.def("get_media_url", &FeedItem::get_media_url)
.def("get_pub_date", &FeedItem::get_pub_date)
+ .def("get_author_name", &FeedItem::get_author_name)
+ .def("get_author_url", &FeedItem::get_author_url)
.def("get_img_url", &FeedItem::get_img_url);
py::class_<Feed>(m, "Feed")
.def(py::init<std::string>())