Teknik is a suite of services with attractive and functional interfaces. https://www.teknik.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ArchiveProcessor.php 19KB


  1. <?php
  2. /**
  3. * Piwik - free/libre analytics platform
  4. *
  5. * @link http://piwik.org
  6. * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
  7. *
  8. */
  9. namespace Piwik;
  10. use Exception;
  11. use Piwik\ArchiveProcessor\Parameters;
  12. use Piwik\ArchiveProcessor\Rules;
  13. use Piwik\DataAccess\ArchiveWriter;
  14. use Piwik\DataAccess\LogAggregator;
  15. use Piwik\DataTable\Manager;
  16. use Piwik\DataTable\Map;
  17. use Piwik\DataTable\Row;
  18. use Piwik\Db;
  19. use Piwik\Period;
  20. /**
  21. * Used by {@link Piwik\Plugin\Archiver} instances to insert and aggregate archive data.
  22. *
  23. * ### See also
  24. *
  25. * - **{@link Piwik\Plugin\Archiver}** - to learn how plugins should implement their own analytics
  26. * aggregation logic.
  27. * - **{@link Piwik\DataAccess\LogAggregator}** - to learn how plugins can perform data aggregation
  28. * across Piwik's log tables.
  29. *
  30. * ### Examples
  31. *
  32. * **Inserting numeric data**
  33. *
  34. * // function in an Archiver descendant
  35. * public function aggregateDayReport()
  36. * {
  37. * $archiveProcessor = $this->getProcessor();
  38. *
  39. * $myFancyMetric = // ... calculate the metric value ...
  40. * $archiveProcessor->insertNumericRecord('MyPlugin_myFancyMetric', $myFancyMetric);
  41. * }
  42. *
  43. * **Inserting serialized DataTables**
  44. *
  45. * // function in an Archiver descendant
  46. * public function aggregateDayReport()
  47. * {
  48. * $archiveProcessor = $this->getProcessor();
  49. *
  50. * $maxRowsInTable = Config::getInstance()->General['datatable_archiving_maximum_rows_standard'];j
  51. *
  52. * $dataTable = // ... build by aggregating visits ...
  53. * $serializedData = $dataTable->getSerialized($maxRowsInTable, $maxRowsInSubtable = $maxRowsInTable,
  54. * $columnToSortBy = Metrics::INDEX_NB_VISITS);
  55. *
  56. * $archiveProcessor->insertBlobRecords('MyPlugin_myFancyReport', $serializedData);
  57. * }
  58. *
  59. * **Aggregating archive data**
  60. *
  61. * // function in Archiver descendant
  62. * public function aggregateMultipleReports()
  63. * {
  64. * $archiveProcessor = $this->getProcessor();
  65. *
  66. * // aggregate a metric
  67. * $archiveProcessor->aggregateNumericMetrics('MyPlugin_myFancyMetric');
  68. * $archiveProcessor->aggregateNumericMetrics('MyPlugin_mySuperFancyMetric', 'max');
  69. *
  70. * // aggregate a report
  71. * $archiveProcessor->aggregateDataTableRecords('MyPlugin_myFancyReport');
  72. * }
  73. *
  74. */
  75. class ArchiveProcessor
  76. {
  77. /**
  78. * @var \Piwik\DataAccess\ArchiveWriter
  79. */
  80. protected $archiveWriter;
  81. /**
  82. * @var \Piwik\DataAccess\LogAggregator
  83. */
  84. protected $logAggregator;
  85. /**
  86. * @var Archive
  87. */
  88. public $archive = null;
  89. /**
  90. * @var Parameters
  91. */
  92. protected $params;
  93. /**
  94. * @var int
  95. */
  96. protected $numberOfVisits = false;
  97. protected $numberOfVisitsConverted = false;
  98. /**
  99. * If true, unique visitors are not calculated when we are aggregating data for multiple sites.
  100. * The `[General] enable_processing_unique_visitors_multiple_sites` INI config option controls
  101. * the value of this variable.
  102. *
  103. * @var bool
  104. */
  105. private $skipUniqueVisitorsCalculationForMultipleSites = true;
  106. const SKIP_UNIQUE_VISITORS_FOR_MULTIPLE_SITES = 'enable_processing_unique_visitors_multiple_sites';
  107. public function __construct(Parameters $params, ArchiveWriter $archiveWriter)
  108. {
  109. $this->params = $params;
  110. $this->logAggregator = new LogAggregator($params);
  111. $this->archiveWriter = $archiveWriter;
  112. $this->skipUniqueVisitorsCalculationForMultipleSites = Rules::shouldSkipUniqueVisitorsCalculationForMultipleSites();
  113. }
  114. protected function getArchive()
  115. {
  116. if(empty($this->archive)) {
  117. $subPeriods = $this->params->getSubPeriods();
  118. $idSites = $this->params->getIdSites();
  119. $this->archive = Archive::factory($this->params->getSegment(), $subPeriods, $idSites);
  120. }
  121. return $this->archive;
  122. }
  123. public function setNumberOfVisits($visits, $visitsConverted)
  124. {
  125. $this->numberOfVisits = $visits;
  126. $this->numberOfVisitsConverted = $visitsConverted;
  127. }
  128. /**
  129. * Returns the {@link Parameters} object containing the site, period and segment we're archiving
  130. * data for.
  131. *
  132. * @return Parameters
  133. * @api
  134. */
  135. public function getParams()
  136. {
  137. return $this->params;
  138. }
  139. /**
  140. * Returns a `{@link Piwik\DataAccess\LogAggregator}` instance for the site, period and segment this
  141. * ArchiveProcessor will insert archive data for.
  142. *
  143. * @return LogAggregator
  144. * @api
  145. */
  146. public function getLogAggregator()
  147. {
  148. return $this->logAggregator;
  149. }
  150. /**
  151. * Array of (column name before => column name renamed) of the columns for which sum operation is invalid.
  152. * These columns will be renamed as per this mapping.
  153. * @var array
  154. */
  155. protected static $columnsToRenameAfterAggregation = array(
  156. Metrics::INDEX_NB_UNIQ_VISITORS => Metrics::INDEX_SUM_DAILY_NB_UNIQ_VISITORS,
  157. Metrics::INDEX_NB_USERS => Metrics::INDEX_SUM_DAILY_NB_USERS,
  158. );
  159. /**
  160. * Sums records for every subperiod of the current period and inserts the result as the record
  161. * for this period.
  162. *
  163. * DataTables are summed recursively so subtables will be summed as well.
  164. *
  165. * @param string|array $recordNames Name(s) of the report we are aggregating, eg, `'Referrers_type'`.
  166. * @param int $maximumRowsInDataTableLevelZero Maximum number of rows allowed in the top level DataTable.
  167. * @param int $maximumRowsInSubDataTable Maximum number of rows allowed in each subtable.
  168. * @param string $columnToSortByBeforeTruncation The name of the column to sort by before truncating a DataTable.
  169. * @param array $columnsAggregationOperation Operations for aggregating columns, see {@link Row::sumRow()}.
  170. * @param array $columnsToRenameAfterAggregation Columns mapped to new names for columns that must change names
  171. * when summed because they cannot be summed, eg,
  172. * `array('nb_uniq_visitors' => 'sum_daily_nb_uniq_visitors')`.
  173. * @return array Returns the row counts of each aggregated report before truncation, eg,
  174. *
  175. * array(
  176. * 'report1' => array('level0' => $report1->getRowsCount,
  177. * 'recursive' => $report1->getRowsCountRecursive()),
  178. * 'report2' => array('level0' => $report2->getRowsCount,
  179. * 'recursive' => $report2->getRowsCountRecursive()),
  180. * ...
  181. * )
  182. * @api
  183. */
  184. public function aggregateDataTableRecords($recordNames,
  185. $maximumRowsInDataTableLevelZero = null,
  186. $maximumRowsInSubDataTable = null,
  187. $columnToSortByBeforeTruncation = null,
  188. &$columnsAggregationOperation = null,
  189. $columnsToRenameAfterAggregation = null)
  190. {
  191. if (!is_array($recordNames)) {
  192. $recordNames = array($recordNames);
  193. }
  194. $nameToCount = array();
  195. foreach ($recordNames as $recordName) {
  196. $latestUsedTableId = Manager::getInstance()->getMostRecentTableId();
  197. $table = $this->aggregateDataTableRecord($recordName, $columnsAggregationOperation, $columnsToRenameAfterAggregation);
  198. $rowsCount = $table->getRowsCount();
  199. $nameToCount[$recordName]['level0'] = $rowsCount;
  200. $rowsCountRecursive = $rowsCount;
  201. if($this->isAggregateSubTables()) {
  202. $rowsCountRecursive = $table->getRowsCountRecursive();
  203. }
  204. $nameToCount[$recordName]['recursive'] = $rowsCountRecursive;
  205. $blob = $table->getSerialized($maximumRowsInDataTableLevelZero, $maximumRowsInSubDataTable, $columnToSortByBeforeTruncation);
  206. Common::destroy($table);
  207. $this->insertBlobRecord($recordName, $blob);
  208. unset($blob);
  209. DataTable\Manager::getInstance()->deleteAll($latestUsedTableId);
  210. }
  211. return $nameToCount;
  212. }
  213. /**
  214. * Aggregates one or more metrics for every subperiod of the current period and inserts the results
  215. * as metrics for the current period.
  216. *
  217. * @param array|string $columns Array of metric names to aggregate.
  218. * @param bool|string $operationToApply The operation to apply to the metric. Either `'sum'`, `'max'` or `'min'`.
  219. * @return array|int Returns the array of aggregate values. If only one metric was aggregated,
  220. * the aggregate value will be returned as is, not in an array.
  221. * For example, if `array('nb_visits', 'nb_hits')` is supplied for `$columns`,
  222. *
  223. * array(
  224. * 'nb_visits' => 3040,
  225. * 'nb_hits' => 405
  226. * )
  227. *
  228. * could be returned. If `array('nb_visits')` or `'nb_visits'` is used for `$columns`,
  229. * then `3040` would be returned.
  230. * @api
  231. */
  232. public function aggregateNumericMetrics($columns, $operationToApply = false)
  233. {
  234. $metrics = $this->getAggregatedNumericMetrics($columns, $operationToApply);
  235. foreach($metrics as $column => $value) {
  236. $this->archiveWriter->insertRecord($column, $value);
  237. }
  238. // if asked for only one field to sum
  239. if (count($metrics) == 1) {
  240. return reset($metrics);
  241. }
  242. // returns the array of records once summed
  243. return $metrics;
  244. }
  245. public function getNumberOfVisits()
  246. {
  247. if($this->numberOfVisits === false) {
  248. throw new Exception("visits should have been set here");
  249. }
  250. return $this->numberOfVisits;
  251. }
  252. public function getNumberOfVisitsConverted()
  253. {
  254. return $this->numberOfVisitsConverted;
  255. }
  256. /**
  257. * Caches multiple numeric records in the archive for this processor's site, period
  258. * and segment.
  259. *
  260. * @param array $numericRecords A name-value mapping of numeric values that should be
  261. * archived, eg,
  262. *
  263. * array('Referrers_distinctKeywords' => 23, 'Referrers_distinctCampaigns' => 234)
  264. * @api
  265. */
  266. public function insertNumericRecords($numericRecords)
  267. {
  268. foreach ($numericRecords as $name => $value) {
  269. $this->insertNumericRecord($name, $value);
  270. }
  271. }
  272. /**
  273. * Caches a single numeric record in the archive for this processor's site, period and
  274. * segment.
  275. *
  276. * Numeric values are not inserted if they equal `0`.
  277. *
  278. * @param string $name The name of the numeric value, eg, `'Referrers_distinctKeywords'`.
  279. * @param float $value The numeric value.
  280. * @api
  281. */
  282. public function insertNumericRecord($name, $value)
  283. {
  284. $value = round($value, 2);
  285. $this->archiveWriter->insertRecord($name, $value);
  286. }
  287. /**
  288. * Caches one or more blob records in the archive for this processor's site, period
  289. * and segment.
  290. *
  291. * @param string $name The name of the record, eg, 'Referrers_type'.
  292. * @param string|array $values A blob string or an array of blob strings. If an array
  293. * is used, the first element in the array will be inserted
  294. * with the `$name` name. The others will be inserted with
  295. * `$name . '_' . $index` as the record name (where $index is
  296. * the index of the blob record in `$values`).
  297. * @api
  298. */
  299. public function insertBlobRecord($name, $values)
  300. {
  301. $this->archiveWriter->insertBlobRecord($name, $values);
  302. }
  303. /**
  304. * This method selects all DataTables that have the name $name over the period.
  305. * All these DataTables are then added together, and the resulting DataTable is returned.
  306. *
  307. * @param string $name
  308. * @param array $columnsAggregationOperation Operations for aggregating columns, @see Row::sumRow()
  309. * @param array $columnsToRenameAfterAggregation columns in the array (old name, new name) to be renamed as the sum operation is not valid on them (eg. nb_uniq_visitors->sum_daily_nb_uniq_visitors)
  310. * @return DataTable
  311. */
  312. protected function aggregateDataTableRecord($name, $columnsAggregationOperation = null, $columnsToRenameAfterAggregation = null)
  313. {
  314. if($this->isAggregateSubTables()) {
  315. // By default we shall aggregate all sub-tables.
  316. $dataTable = $this->getArchive()->getDataTableExpanded($name, $idSubTable = null, $depth = null, $addMetadataSubtableId = false);
  317. } else {
  318. // In some cases (eg. Actions plugin when period=range),
  319. // for better performance we will only aggregate the parent table
  320. $dataTable = $this->getArchive()->getDataTable($name, $idSubTable = null);
  321. }
  322. if ($dataTable instanceof Map) {
  323. // see https://github.com/piwik/piwik/issues/4377
  324. $self = $this;
  325. $dataTable->filter(function ($table) use ($self, $columnsToRenameAfterAggregation) {
  326. $self->renameColumnsAfterAggregation($table, $columnsToRenameAfterAggregation);
  327. });
  328. }
  329. $dataTable = $this->getAggregatedDataTableMap($dataTable, $columnsAggregationOperation);
  330. $this->renameColumnsAfterAggregation($dataTable, $columnsToRenameAfterAggregation);
  331. return $dataTable;
  332. }
  333. protected function getOperationForColumns($columns, $defaultOperation)
  334. {
  335. $operationForColumn = array();
  336. foreach ($columns as $name) {
  337. $operation = $defaultOperation;
  338. if (empty($operation)) {
  339. $operation = $this->guessOperationForColumn($name);
  340. }
  341. $operationForColumn[$name] = $operation;
  342. }
  343. return $operationForColumn;
  344. }
  345. protected function enrichWithUniqueVisitorsMetric(Row $row)
  346. {
  347. // skip unique visitors metrics calculation if calculating for multiple sites is disabled
  348. if (!$this->getParams()->isSingleSite()
  349. && $this->skipUniqueVisitorsCalculationForMultipleSites
  350. ) {
  351. return;
  352. }
  353. if ($row->getColumn('nb_uniq_visitors') !== false
  354. || $row->getColumn('nb_users') !== false
  355. ) {
  356. if (SettingsPiwik::isUniqueVisitorsEnabled($this->getParams()->getPeriod()->getLabel())) {
  357. $metrics = array(Metrics::INDEX_NB_UNIQ_VISITORS, Metrics::INDEX_NB_USERS);
  358. $uniques = $this->computeNbUniques( $metrics );
  359. $row->setColumn('nb_uniq_visitors', $uniques[Metrics::INDEX_NB_UNIQ_VISITORS]);
  360. $row->setColumn('nb_users', $uniques[Metrics::INDEX_NB_USERS]);
  361. } else {
  362. $row->deleteColumn('nb_uniq_visitors');
  363. $row->deleteColumn('nb_users');
  364. }
  365. }
  366. }
  367. protected function guessOperationForColumn($column)
  368. {
  369. if (strpos($column, 'max_') === 0) {
  370. return 'max';
  371. }
  372. if (strpos($column, 'min_') === 0) {
  373. return 'min';
  374. }
  375. return 'sum';
  376. }
  377. /**
  378. * Processes number of unique visitors for the given period
  379. *
  380. * This is the only Period metric (ie. week/month/year/range) that we process from the logs directly,
  381. * since unique visitors cannot be summed like other metrics.
  382. *
  383. * @param array Metrics Ids for which to aggregates count of values
  384. * @return int
  385. */
  386. protected function computeNbUniques($metrics)
  387. {
  388. $logAggregator = $this->getLogAggregator();
  389. $query = $logAggregator->queryVisitsByDimension(array(), false, array(), $metrics);
  390. $data = $query->fetch();
  391. return $data;
  392. }
  393. /**
  394. * If the DataTable is a Map, sums all DataTable in the map and return the DataTable.
  395. *
  396. *
  397. * @param $data DataTable|DataTable\Map
  398. * @param $columnsToRenameAfterAggregation array
  399. * @return DataTable
  400. */
  401. protected function getAggregatedDataTableMap($data, $columnsAggregationOperation)
  402. {
  403. $table = new DataTable();
  404. if (!empty($columnsAggregationOperation)) {
  405. $table->setMetadata(DataTable::COLUMN_AGGREGATION_OPS_METADATA_NAME, $columnsAggregationOperation);
  406. }
  407. if ($data instanceof DataTable\Map) {
  408. // as $date => $tableToSum
  409. $this->aggregatedDataTableMapsAsOne($data, $table);
  410. } else {
  411. $table->addDataTable($data, $this->isAggregateSubTables());
  412. }
  413. return $table;
  414. }
  415. /**
  416. * Aggregates the DataTable\Map into the destination $aggregated
  417. * @param $map
  418. * @param $aggregated
  419. */
  420. protected function aggregatedDataTableMapsAsOne(Map $map, DataTable $aggregated)
  421. {
  422. foreach ($map->getDataTables() as $tableToAggregate) {
  423. if($tableToAggregate instanceof Map) {
  424. $this->aggregatedDataTableMapsAsOne($tableToAggregate, $aggregated);
  425. } else {
  426. $aggregated->addDataTable($tableToAggregate, $this->isAggregateSubTables());
  427. }
  428. }
  429. }
  430. /**
  431. * Note: public only for use in closure in PHP 5.3.
  432. */
  433. public function renameColumnsAfterAggregation(DataTable $table, $columnsToRenameAfterAggregation = null)
  434. {
  435. // Rename columns after aggregation
  436. if (is_null($columnsToRenameAfterAggregation)) {
  437. $columnsToRenameAfterAggregation = self::$columnsToRenameAfterAggregation;
  438. }
  439. foreach ($columnsToRenameAfterAggregation as $oldName => $newName) {
  440. $table->renameColumn($oldName, $newName, $this->isAggregateSubTables());
  441. }
  442. }
  443. protected function getAggregatedNumericMetrics($columns, $operationToApply)
  444. {
  445. if (!is_array($columns)) {
  446. $columns = array($columns);
  447. }
  448. $operationForColumn = $this->getOperationForColumns($columns, $operationToApply);
  449. $dataTable = $this->getArchive()->getDataTableFromNumeric($columns);
  450. $results = $this->getAggregatedDataTableMap($dataTable, $operationForColumn);
  451. if ($results->getRowsCount() > 1) {
  452. throw new Exception("A DataTable is an unexpected state:" . var_export($results, true));
  453. }
  454. $rowMetrics = $results->getFirstRow();
  455. if($rowMetrics === false) {
  456. $rowMetrics = new Row;
  457. }
  458. $this->enrichWithUniqueVisitorsMetric($rowMetrics);
  459. $this->renameColumnsAfterAggregation($results);
  460. $metrics = $rowMetrics->getColumns();
  461. foreach ($columns as $name) {
  462. if (!isset($metrics[$name])) {
  463. $metrics[$name] = 0;
  464. }
  465. }
  466. return $metrics;
  467. }
  468. /**
  469. * @return bool
  470. */
  471. protected function isAggregateSubTables()
  472. {
  473. return !$this->getParams()->isSkipAggregationOfSubTables();
  474. }
  475. }