Thrill  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
page_rank.hpp
Go to the documentation of this file.
1 /*******************************************************************************
2  * examples/page_rank/page_rank.hpp
3  *
4  * Part of Project Thrill - http://project-thrill.org
5  *
6  * Copyright (C) 2016 Timo Bingmann <[email protected]>
7  * Copyright (C) 2016 Alexander Noe <[email protected]>
8  *
9  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
10  ******************************************************************************/
11 
12 #pragma once
13 #ifndef THRILL_EXAMPLES_PAGE_RANK_PAGE_RANK_HEADER
14 #define THRILL_EXAMPLES_PAGE_RANK_PAGE_RANK_HEADER
15 
16 #include <thrill/api/collapse.hpp>
17 #include <thrill/api/generate.hpp>
19 #include <thrill/api/print.hpp>
22 #include <thrill/api/size.hpp>
23 #include <thrill/api/zip.hpp>
24 #include <thrill/common/logger.hpp>
25 
26 #include <algorithm>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 namespace examples {
32 namespace page_rank {
33 
34 using namespace thrill; // NOLINT
35 
36 static constexpr bool debug = false;
37 
38 static constexpr double dampening = 0.85;
39 
40 using PageId = std::size_t;
41 using Rank = double;
42 
43 //! A pair (page source, page target)
44 struct PagePageLink {
45  PageId src, tgt;
46 
47  friend std::ostream& operator << (std::ostream& os, const PagePageLink& a) {
48  return os << '(' << a.src << '>' << a.tgt << ')';
49  }
51 
52 //! A pair (page, rank)
53 struct PageRankPair {
56 
57  friend std::ostream& operator << (std::ostream& os, const PageRankPair& a) {
58  return os << '(' << a.page << '|' << a.rank << ')';
59  }
61 
62 using PageRankStdPair = std::pair<PageId, Rank>;
63 using OutgoingLinks = std::vector<PageId>;
64 using OutgoingLinksRank = std::pair<std::vector<PageId>, Rank>;
65 using LinkedPage = std::pair<PageId, OutgoingLinks>;
66 using RankedPage = std::pair<PageId, Rank>;
67 
68 template <typename InStack>
69 auto PageRank(const DIA<OutgoingLinks, InStack>&links,
70  size_t num_pages, size_t iterations) {
71 
72  api::Context& ctx = links.context();
73  double num_pages_d = static_cast<double>(num_pages);
74 
75  // initialize all ranks to 1.0 / n: (url, rank)
76 
77  DIA<Rank> ranks =
78  Generate(
79  ctx, num_pages,
80  [num_pages_d](size_t) { return Rank(1.0) / num_pages_d; })
81  .Collapse();
82 
83  // do iterations
84  for (size_t iter = 0; iter < iterations; ++iter) {
85 
86  // for all outgoing link, get their rank contribution from all
87  // links by doing:
88  //
89  // 1) group all outgoing links with rank of its parent page: (Zip)
90  // ([linked_url, linked_url, ...], rank_parent)
91  //
92  // 2) compute rank contribution for each linked_url: (FlatMap)
93  // (linked_url, rank / outgoing.size)
94 
95  auto outs_rank = links.Zip(
96  ranks,
97  [](const OutgoingLinks& ol, const Rank& r) {
98  return OutgoingLinksRank(ol, r);
99  });
100 
101  if (debug) {
102  outs_rank
103  .Map([](const OutgoingLinksRank& ol) {
104  return common::Join(',', ol.first)
105  + " <- " + std::to_string(ol.second);
106  })
107  .Print("outs_rank");
108  }
109 
110  auto contribs = outs_rank.template FlatMap<PageRankPair>(
111  [](const OutgoingLinksRank& p, auto emit) {
112  if (p.first.size() > 0) {
113  Rank rank_contrib = p.second / static_cast<double>(p.first.size());
114  for (const PageId& tgt : p.first)
115  emit(PageRankPair { tgt, rank_contrib });
116  }
117  });
118 
119  // reduce all rank contributions by adding all rank contributions and
120  // compute the new rank: (url, rank)
121 
122  ranks =
123  contribs
124  .ReduceToIndex(
125  [](const PageRankPair& p) { return p.page; },
126  [](const PageRankPair& p1, const PageRankPair& p2) {
127  return PageRankPair { p1.page, p1.rank + p2.rank };
128  }, num_pages)
129  .Map([num_pages_d](const PageRankPair& p) {
130  return dampening * p.rank + (1 - dampening) / num_pages_d;
131  })
132  .Collapse();
133  }
134 
135  return ranks;
136 }
137 
138 template <const bool UseLocationDetection = false, typename InStack>
139 auto PageRankJoin(const DIA<LinkedPage, InStack>&links, size_t num_pages,
140  size_t iterations) {
141 
142  api::Context& ctx = links.context();
143  double num_pages_d = static_cast<double>(num_pages);
144 
145  // initialize all ranks to 1.0 / n: (url, rank)
146 
147  DIA<RankedPage> ranks =
148  Generate(
149  ctx, num_pages,
150  [num_pages_d](size_t idx) {
151  return std::make_pair(idx, Rank(1.0) / num_pages_d);
152  })
153  .Collapse();
154 
155  // do iterations
156  for (size_t iter = 0; iter < iterations; ++iter) {
157 
158  // for all outgoing link, get their rank contribution from all
159  // links by doing:
160  //
161  // 1) group all outgoing links with rank of its parent page: (Zip)
162  // ([linked_url, linked_url, ...], rank_parent)
163  //
164  // 2) compute rank contribution for each linked_url: (FlatMap)
165  // (linked_url, rank / outgoing.size)
166 
167  auto outs_rank = InnerJoin(
168  LocationDetectionFlag<UseLocationDetection>(),
169  links, ranks,
170  [](const LinkedPage& lp) { return lp.first; },
171  [](const RankedPage& rp) { return rp.first; },
172  [](const LinkedPage& lp, const RankedPage& rp) {
173  return std::make_pair(lp.second, rp.second);
174  });
175 
176  if (debug) {
177  outs_rank
178  .Map([](const OutgoingLinksRank& ol) {
179  return common::Join(',', ol.first)
180  + " <- " + std::to_string(ol.second);
181  })
182  .Print("outs_rank");
183  }
184 
185  auto contribs = outs_rank.template FlatMap<PageRankStdPair>(
186  [](const OutgoingLinksRank& p, auto emit) {
187  if (p.first.size() > 0) {
188  Rank rank_contrib = p.second / static_cast<double>(p.first.size());
189  for (const PageId& tgt : p.first)
190  emit(std::make_pair(tgt, rank_contrib));
191  }
192  });
193 
194  // reduce all rank contributions by adding all rank contributions and
195  // compute the new rank: (url, rank)
196 
197  ranks =
198  contribs
199  .ReducePair(
200  [](const Rank& p1, const Rank& p2) {
201  return p1 + p2;
202  })
203  .Map([num_pages_d](const PageRankStdPair& p) {
204  return std::make_pair(
205  p.first,
206  dampening * p.second + (1 - dampening) / num_pages_d);
207  }).Collapse();
208 
209  ranks.Execute();
210  }
211 
212  return ranks;
213 }
214 
215 } // namespace page_rank
216 } // namespace examples
217 
218 #endif // !THRILL_EXAMPLES_PAGE_RANK_PAGE_RANK_HEADER
219 
220 /******************************************************************************/