Teknik is a suite of services with attractive and functional interfaces. https://www.teknik.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

RankingQuery.php 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik;
  10. use Exception;
  11. /**
  12. * The ranking query class wraps an arbitrary SQL query with more SQL that limits
  13. * the number of results while aggregating the rest in an a new "Others" row. It also
  14. * allows for some more fancy things that can be configured via method calls of this
  15. * class. The advanced use cases are explained in the doc comments of the methods.
  16. *
  17. * The general use case looks like this:
  18. *
  19. * // limit to 500 rows + "Others"
  20. * $rankingQuery = new RankingQuery();
  21. * $rankingQuery->setLimit(500);
  22. *
  23. * // idaction_url will be "Others" in the row that contains the aggregated rest
  24. * $rankingQuery->addLabelColumn('idaction_url');
  25. *
  26. * // the actual query. it's important to sort it before the limit is applied
  27. * $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits
  28. * FROM log_link_visit_action
  29. * GROUP BY idaction_url
  30. * ORDER BY nb_hits DESC';
  31. *
  32. * // execute the query
  33. * $rankingQuery->execute($sql);
  34. *
  35. * For more examples, see RankingQueryTest.php
  36. *
  37. * @api
  38. */
  39. class RankingQuery
  40. {
  41. /**
  42. * Contains the labels of the inner query.
  43. * Format: "label" => true (to make sure labels don't appear twice)
  44. * @var array
  45. */
  46. private $labelColumns = array();
  47. /**
  48. * The columns of the inner query that are not labels
  49. * Format: "label" => "aggregation function" or false for no aggregation
  50. * @var array
  51. */
  52. private $additionalColumns = array();
  53. /**
  54. * The limit for each group
  55. * @var int
  56. */
  57. private $limit = 5;
  58. /**
  59. * The name of the columns that marks rows to be excluded from the limit
  60. * @var string
  61. */
  62. private $columnToMarkExcludedRows = false;
  63. /**
  64. * The column that is used to partition the result
  65. * @var bool|string
  66. */
  67. private $partitionColumn = false;
  68. /**
  69. * The possible values for the column $this->partitionColumn
  70. * @var array
  71. */
  72. private $partitionColumnValues = array();
  73. /**
  74. * The value to use in the label of the 'Others' row.
  75. * @var string
  76. */
  77. private $othersLabelValue = 'Others';
  78. /**
  79. * Constructor.
  80. *
  81. * @param int|false $limit The result row limit. See {@link setLimit()}.
  82. */
  83. public function __construct($limit = false)
  84. {
  85. if ($limit !== false) {
  86. $this->setLimit($limit);
  87. }
  88. }
  89. /**
  90. * Set the limit after which everything is grouped to "Others".
  91. *
  92. * @param int $limit
  93. */
  94. public function setLimit($limit)
  95. {
  96. $this->limit = $limit;
  97. }
  98. /**
  99. * Set the value to use for the label in the 'Others' row.
  100. *
  101. * @param string $value
  102. */
  103. public function setOthersLabel($value)
  104. {
  105. $this->othersLabelValue = $value;
  106. }
  107. /**
  108. * Add a label column.
  109. * Labels are the columns that are replaced with "Others" after the limit.
  110. *
  111. * @param string|array $labelColumn
  112. */
  113. public function addLabelColumn($labelColumn)
  114. {
  115. if (is_array($labelColumn)) {
  116. foreach ($labelColumn as $label) {
  117. $this->addLabelColumn($label);
  118. }
  119. return;
  120. }
  121. $this->labelColumns[$labelColumn] = true;
  122. }
  123. /**
  124. * Add a column that has be added to the outer queries.
  125. *
  126. * @param $column
  127. * @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others",
  128. * eg, `'min'`, `'max'` or `'sum'`.
  129. */
  130. public function addColumn($column, $aggregationFunction = false)
  131. {
  132. if (is_array($column)) {
  133. foreach ($column as $c) {
  134. $this->addColumn($c, $aggregationFunction);
  135. }
  136. return;
  137. }
  138. $this->additionalColumns[$column] = $aggregationFunction;
  139. }
  140. /**
  141. * Sets a column that will be used to filter the result into two categories.
  142. * Rows where this column has a value > 0 will be removed from the result and put
  143. * into another array. Both the result and the array of excluded rows are returned
  144. * by {@link execute()}.
  145. *
  146. * @param $column string Name of the column.
  147. * @throws Exception if method is used more than once.
  148. */
  149. public function setColumnToMarkExcludedRows($column)
  150. {
  151. if ($this->columnToMarkExcludedRows !== false) {
  152. throw new Exception("setColumnToMarkExcludedRows can only be used once");
  153. }
  154. $this->columnToMarkExcludedRows = $column;
  155. $this->addColumn($this->columnToMarkExcludedRows);
  156. }
  157. /**
  158. * This method can be used to parition the result based on the possible values of one
  159. * table column. This means the query will split the result set into other sets of rows
  160. * for each possible value you provide (where the rows of each set have a column value
  161. * that equals a possible value). Each of these new sets of rows will be individually
  162. * limited resulting in several limited result sets.
  163. *
  164. * For example, you can run a query aggregating some data on the log_action table and
  165. * partition by log_action.type with the possible values of {@link Piwik\Tracker\Action::TYPE_PAGE_URL},
  166. * {@link Piwik\Tracker\Action::TYPE_OUTLINK}, {@link Piwik\Tracker\Action::TYPE_DOWNLOAD}.
  167. * The result will be three separate result sets that are aggregated the same ways, but for rows
  168. * where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for
  169. * rows `log_action.type = TYPE_DOWNLOAD`.
  170. *
  171. * @param $partitionColumn string The column name to partion by.
  172. * @param $possibleValues Array of possible column values.
  173. * @throws Exception if method is used more than once.
  174. */
  175. public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues)
  176. {
  177. if ($this->partitionColumn !== false) {
  178. throw new Exception("partitionResultIntoMultipleGroups can only be used once");
  179. }
  180. $this->partitionColumn = $partitionColumn;
  181. $this->partitionColumnValues = $possibleValues;
  182. $this->addColumn($partitionColumn);
  183. }
  184. /**
  185. * Executes the query.
  186. * The object has to be configured first using the other methods.
  187. *
  188. * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering
  189. * has to be specified in this query. {@link RankingQuery} cannot apply ordering
  190. * itself.
  191. * @param $bind array Bindings for the inner query.
  192. * @return array The format depends on which methods have been used
  193. * to configure the ranking query.
  194. */
  195. public function execute($innerQuery, $bind = array())
  196. {
  197. $query = $this->generateQuery($innerQuery);
  198. $data = Db::fetchAll($query, $bind);
  199. if ($this->columnToMarkExcludedRows !== false) {
  200. // split the result into the regular result and the rows with special treatment
  201. $excludedFromLimit = array();
  202. $result = array();
  203. foreach ($data as &$row) {
  204. if ($row[$this->columnToMarkExcludedRows] != 0) {
  205. $excludedFromLimit[] = $row;
  206. } else {
  207. $result[] = $row;
  208. }
  209. }
  210. $data = array(
  211. 'result' => &$result,
  212. 'excludedFromLimit' => &$excludedFromLimit
  213. );
  214. }
  215. if ($this->partitionColumn !== false) {
  216. if ($this->columnToMarkExcludedRows !== false) {
  217. $data['result'] = $this->splitPartitions($data['result']);
  218. } else {
  219. $data = $this->splitPartitions($data);
  220. }
  221. }
  222. return $data;
  223. }
  224. private function splitPartitions(&$data)
  225. {
  226. $result = array();
  227. foreach ($data as &$row) {
  228. $partition = $row[$this->partitionColumn];
  229. if (!isset($result[$partition])) {
  230. $result[$partition] = array();
  231. }
  232. $result[$partition][] = & $row;
  233. }
  234. return $result;
  235. }
  236. /**
  237. * Generate the SQL code that does the magic.
  238. * If you want to get the result, use execute() instead. If you want to run the query
  239. * yourself, use this method.
  240. *
  241. * @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering
  242. * has to be specified in this query. {@link RankingQuery} cannot apply ordering
  243. * itself.
  244. * @return string The entire ranking query SQL.
  245. */
  246. public function generateQuery($innerQuery)
  247. {
  248. // +1 to include "Others"
  249. $limit = $this->limit + 1;
  250. $counterExpression = $this->getCounterExpression($limit);
  251. // generate select clauses for label columns
  252. $labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`';
  253. $labelColumnsOthersSwitch = array();
  254. foreach ($this->labelColumns as $column => $true) {
  255. $labelColumnsOthersSwitch[] = "
  256. CASE
  257. WHEN counter = $limit THEN '" . $this->othersLabelValue . "'
  258. ELSE `$column`
  259. END AS `$column`
  260. ";
  261. }
  262. $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch);
  263. // generate select clauses for additional columns
  264. $additionalColumnsString = '';
  265. $additionalColumnsAggregatedString = '';
  266. foreach ($this->additionalColumns as $additionalColumn => $aggregation) {
  267. $additionalColumnsString .= ', `' . $additionalColumn . '`';
  268. if ($aggregation !== false) {
  269. $additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`';
  270. } else {
  271. $additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`';
  272. }
  273. }
  274. // initialize the counters
  275. if ($this->partitionColumn !== false) {
  276. $initCounter = '';
  277. foreach ($this->partitionColumnValues as $value) {
  278. $initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', ';
  279. }
  280. } else {
  281. $initCounter = '( SELECT @counter:=0 ) initCounter,';
  282. }
  283. // add a counter to the query
  284. // we rely on the sorting of the inner query
  285. $withCounter = "
  286. SELECT
  287. $labelColumnsString,
  288. $counterExpression AS counter
  289. $additionalColumnsString
  290. FROM
  291. $initCounter
  292. ( $innerQuery ) actualQuery
  293. ";
  294. // group by the counter - this groups "Others" because the counter stops at $limit
  295. $groupBy = 'counter';
  296. if ($this->partitionColumn !== false) {
  297. $groupBy .= ', `' . $this->partitionColumn . '`';
  298. }
  299. $groupOthers = "
  300. SELECT
  301. $labelColumnsOthersSwitch
  302. $additionalColumnsAggregatedString
  303. FROM ( $withCounter ) AS withCounter
  304. GROUP BY $groupBy
  305. ";
  306. return $groupOthers;
  307. }
  308. private function getCounterExpression($limit)
  309. {
  310. $whens = array();
  311. if ($this->columnToMarkExcludedRows !== false) {
  312. // when a row has been specified that marks which records should be excluded
  313. // from limiting, we don't give those rows the normal counter but -1 times the
  314. // value they had before. this way, they have a separate number space (i.e. negative
  315. // integers).
  316. $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}";
  317. }
  318. if ($this->partitionColumn !== false) {
  319. // partition: one counter per possible value
  320. foreach ($this->partitionColumnValues as $value) {
  321. $isValue = '`' . $this->partitionColumn . '` = ' . intval($value);
  322. $counter = '@counter' . intval($value);
  323. $whens[] = "WHEN $isValue AND $counter = $limit THEN $limit";
  324. $whens[] = "WHEN $isValue THEN $counter:=$counter+1";
  325. }
  326. $whens[] = "ELSE 0";
  327. } else {
  328. // no partitioning: add a single counter
  329. $whens[] = "WHEN @counter = $limit THEN $limit";
  330. $whens[] = "ELSE @counter:=@counter+1";
  331. }
  332. return "
  333. CASE
  334. " . implode("
  335. ", $whens) . "
  336. END
  337. ";
  338. }
  339. }