104 lines
4.1 KiB
Diff
104 lines
4.1 KiB
Diff
|
From: Gabriele Musco <gabmus@disroot.org>
|
||
|
Date: Sat, 25 Mar 2023 14:13:44 +0100
|
||
|
Subject: add author extraction for feed item
|
||
|
|
||
|
(cherry picked from commit 75920321062d682437f3fb0319dad227d8b18f6c)
|
||
|
---
|
||
|
src/feed_item.cpp | 12 +++++++++---
|
||
|
src/feed_item.hpp | 14 ++++++++++++++
|
||
|
src/pybind.cpp | 4 ++++
|
||
|
3 files changed, 27 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/src/feed_item.cpp b/src/feed_item.cpp
|
||
|
index a08cd71..e0dbb8c 100644
|
||
|
--- a/src/feed_item.cpp
|
||
|
+++ b/src/feed_item.cpp
|
||
|
@@ -1,5 +1,5 @@
|
||
|
-#include "feed_item.hpp"
|
||
|
-#include "utils.hpp"
|
||
|
+#include "./feed_item.hpp"
|
||
|
+#include "./utils.hpp"
|
||
|
|
||
|
std::string FeedItem::extract_url() {
|
||
|
std::string res = item_node.child("link").text().as_string();
|
||
|
@@ -75,6 +75,10 @@ void FeedItem::parse() {
|
||
|
// pub_date
|
||
|
pub_date = SynDomUtils::extract_from_node(item_node, __PUB_DATE_PARAMS);
|
||
|
|
||
|
+ // author
|
||
|
+ author_name = SynDomUtils::extract_from_node(item_node, __AUTHOR_NAME_PARAMS);
|
||
|
+ author_url = SynDomUtils::extract_from_node(item_node, __AUTHOR_URL_PARAMS);
|
||
|
+
|
||
|
// img_url
|
||
|
img_url = extract_img_url();
|
||
|
fix_url(img_url);
|
||
|
@@ -87,6 +91,8 @@ std::string FeedItem::to_json() {
|
||
|
" \"url\": \"" + url + "\",\n"
|
||
|
" \"media_url\": \"" + media_url + "\",\n"
|
||
|
" \"pub_date\": \"" + pub_date + "\",\n"
|
||
|
- " \"img_url\": \"" + img_url + "\"\n"
|
||
|
+ " \"img_url\": \"" + img_url + "\",\n"
|
||
|
+ " \"author_name\": \"" + author_name + "\",\n"
|
||
|
+ " \"author_url\": \"" + author_url + "\"\n"
|
||
|
" }";
|
||
|
}
|
||
|
diff --git a/src/feed_item.hpp b/src/feed_item.hpp
|
||
|
index 5d7105e..fd1259f 100644
|
||
|
--- a/src/feed_item.hpp
|
||
|
+++ b/src/feed_item.hpp
|
||
|
@@ -28,6 +28,8 @@ private:
|
||
|
std::string media_url;
|
||
|
std::string pub_date;
|
||
|
std::string img_url;
|
||
|
+ std::string author_name;
|
||
|
+ std::string author_url;
|
||
|
|
||
|
/**
|
||
|
* Tries to extract the item url and returns it.
|
||
|
@@ -73,6 +75,16 @@ private:
|
||
|
{ExtractionParam::ParamType::CHILD, {"date"}},
|
||
|
{ExtractionParam::ParamType::CHILD, {"dc:date"}}
|
||
|
};
|
||
|
+ static inline const std::vector<ExtractionParam> __AUTHOR_NAME_PARAMS{
|
||
|
+ {ExtractionParam::ParamType::CHILD, {"author", "name"}},
|
||
|
+ {ExtractionParam::ParamType::CHILD, {"author"}},
|
||
|
+ {ExtractionParam::ParamType::CHILD, {"dc:creator"}},
|
||
|
+ {ExtractionParam::ParamType::CHILD, {"creator"}},
|
||
|
+ {ExtractionParam::ParamType::CHILD, {"itunes:author"}},
|
||
|
+ };
|
||
|
+ static inline const std::vector<ExtractionParam> __AUTHOR_URL_PARAMS{
|
||
|
+ {ExtractionParam::ParamType::CHILD, {"author", "uri"}}
|
||
|
+ };
|
||
|
/**
|
||
|
* Entry point of the class, parses all the relevant content. Called by
|
||
|
* the constructor.
|
||
|
@@ -101,6 +113,8 @@ public:
|
||
|
std::string get_media_url() { return media_url; }
|
||
|
std::string get_pub_date() { return pub_date; }
|
||
|
std::string get_img_url() { return img_url; }
|
||
|
+ std::string get_author_name() { return author_name; }
|
||
|
+ std::string get_author_url() { return author_url; }
|
||
|
|
||
|
/**
|
||
|
* Represents the FeedItem object (itself) as a json, returned as a string.
|
||
|
diff --git a/src/pybind.cpp b/src/pybind.cpp
|
||
|
index bef72f9..1d5a58d 100644
|
||
|
--- a/src/pybind.cpp
|
||
|
+++ b/src/pybind.cpp
|
||
|
@@ -19,11 +19,15 @@ PYBIND11_MODULE(syndom, m) {
|
||
|
.def_property_readonly("media_url", &FeedItem::get_media_url)
|
||
|
.def_property_readonly("pub_date", &FeedItem::get_pub_date)
|
||
|
.def_property_readonly("img_url", &FeedItem::get_img_url)
|
||
|
+ .def_property_readonly("author_name", &FeedItem::get_author_name)
|
||
|
+ .def_property_readonly("author_url", &FeedItem::get_author_url)
|
||
|
.def("get_title", &FeedItem::get_title)
|
||
|
.def("get_content", &FeedItem::get_content)
|
||
|
.def("get_url", &FeedItem::get_url)
|
||
|
.def("get_media_url", &FeedItem::get_media_url)
|
||
|
.def("get_pub_date", &FeedItem::get_pub_date)
|
||
|
+ .def("get_author_name", &FeedItem::get_author_name)
|
||
|
+ .def("get_author_url", &FeedItem::get_author_url)
|
||
|
.def("get_img_url", &FeedItem::get_img_url);
|
||
|
py::class_<Feed>(m, "Feed")
|
||
|
.def(py::init<std::string>())
|