Translations of this page?:

This is an old revision of the document!


grapher.php
  1. #!/usr/bin/php
  2. <?php
  3. if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
  4. require_once(DOKU_INC . 'inc/init.php');
  5.  
  6. class Grapher extends DokuCLI {
  7.  
  8. /**
  9.   * Register options and arguments on the given $options object
  10.   *
  11.   * @param DokuCLI_Options $options
  12.   * @return void
  13.   */
  14. protected function setup(DokuCLI_Options $options) {
  15. $options->setHelp('Creates a graph representation of pages and media files and how they are interlinked.');
  16. $options->registerOption(
  17. 'depth',
  18. 'Recursion depth, eg. how deep to look into the given namespaces. Use 0 for all. Default: 1',
  19. 'd', 'depth');
  20. $options->registerOption(
  21. 'media',
  22. "How to handle media files. 'ns' includes only media that is located in the given namespaces, ".
  23. "'all' includes all media files and 'none' ignores the media files completely. ".
  24. "Default: ns",
  25. 'm', 'ns|all|none');
  26. $options->registerOption(
  27. 'format',
  28. "The wanted output format. 'dot' is a very simple format which can be used to visualize the resulting ".
  29. "graph with graphviz. The 'gexf' format is a more complex XML-based format which contains more info ".
  30. "about the found nodes and can be loaded in Gephi. Default: dot",
  31. 'f', 'dot|gexf|json');
  32. $options->registerOption(
  33. 'output',
  34. "Where to store the output eg. a filename. If not given the output is written to STDOUT.",
  35. 'o', 'file');
  36. $options->registerArgument(
  37. 'namespaces',
  38. "Give all wiki namespaces you want to have graphed. If no namespace is given, the root ".
  39. "namespace is assumed.",
  40. false
  41. );
  42. }
  43.  
  44. /**
  45.   * Your main program
  46.   *
  47.   * Arguments and options have been parsed when this is run
  48.   *
  49.   * @param DokuCLI_Options $options
  50.   * @return void
  51.   */
  52. protected function main(DokuCLI_Options $options) {
  53. $depth = $options->getOpt('depth', 1);
  54. $media = $options->getOpt('media', 'ns');
  55. if(!in_array($media, array('ns', 'all', 'none'))) {
  56. $this->fatal('Bad media option: ' . $media);
  57. }
  58. $format = $options->getOpt('format', 'dot');
  59. if(!in_array($format, array('dot', 'gexf','json'))) {
  60. $this->fatal('Bad format option: ' . $format);
  61. }
  62. $output = $options->getOpt('output', '-');
  63. if($output == '-') $output = 'php://stdout';
  64.  
  65. $namespaces = array_map('cleanID', $options->args);
  66. if(!count($namespaces)) $namespaces = array(''); //import from top
  67.  
  68. $fh = @fopen($output, 'w');
  69. if(!$fh) $this->fatal("Failed to open $output");
  70.  
  71. $data = $this->gather_data($namespaces, $depth, $media);
  72. if($format == 'dot') {
  73. $this->create_dot($data, $fh);
  74. } elseif($format == 'gexf') {
  75. $this->create_gexf($data, $fh);
  76. } elseif($format == 'json') {
  77. $this->create_json($data, $fh);
  78. }
  79. fclose($fh);
  80. }
  81.  
  82. /**
  83.   * Find all the node and edge data for the given namespaces
  84.   * @param $namespaces
  85.   * @param int $depth
  86.   * @param string $incmedia
  87.   * @return array
  88.   */
  89. protected function gather_data($namespaces, $depth = 0, $incmedia = 'ns') {
  90. global $conf;
  91. /** @var helper_plugin_translation $transplugin */
  92. $transplugin = plugin_load('helper', 'translation');
  93.  
  94. $pages = array();
  95. $media = array();
  96. foreach($namespaces as $ns) {
  97. // find media
  98. if($incmedia == 'ns') {
  99. $data = array();
  100. search(
  101. $data,
  102. $conf['mediadir'],
  103. 'search_universal',
  104. 'depth' => $depth,
  105. 'listfiles' => true,
  106. 'listdirs' => false,
  107. 'pagesonly' => false,
  108. 'skipacl' => true,
  109. 'keeptxt' => true,
  110. 'meta' => true,
  111. ),
  112. str_replace(':', '/', $ns)
  113. );
  114.  
  115. // go through all those media files
  116. while($item = array_shift($data)) {
  117. $media[$item['id']] = array(
  118. 'title' => noNS($item['id']),
  119. 'size' => $item['size'],
  120. 'ns' => getNS($item['id']),
  121. 'time' => $item['mtime'],
  122. );
  123. }
  124. }
  125.  
  126. // find pages
  127. $data = array();
  128. search(
  129. $data,
  130. $conf['datadir'],
  131. 'search_universal',
  132. 'depth' => $depth,
  133. 'listfiles' => true,
  134. 'listdirs' => false,
  135. 'pagesonly' => true,
  136. 'skipacl' => true,
  137. 'firsthead' => true,
  138. 'meta' => true,
  139. ),
  140. str_replace(':', '/', $ns)
  141. );
  142.  
  143. // ns start page
  144. if($ns && page_exists($ns)) {
  145. $data[] = array(
  146. 'id' => $ns,
  147. 'ns' => getNS($ns),
  148. 'title' => p_get_first_heading($ns, false),
  149. 'size' => filesize(wikiFN($ns)),
  150. 'mtime' => filemtime(wikiFN($ns)),
  151. 'perm' => 16,
  152. 'type' => 'f',
  153. 'level' => 0,
  154. 'open' => 1,
  155. );
  156. }
  157.  
  158. // go through all those pages
  159. while($item = array_shift($data)) {
  160. $time = (int) p_get_metadata($item['id'], 'date created', false);
  161. if(!$time) $time = $item['mtime'];
  162. $lang = ($transplugin) ? $transplugin->getLangPart($item['id']) : '';
  163.  
  164. if($lang) $item['ns'] = preg_replace('/^' . $lang . '(:|$)/', '', $item['ns']);
  165.  
  166. $pages[$item['id']] = array(
  167. 'title' => $item['title'],
  168. 'ns' => $item['ns'],
  169. 'size' => $item['size'],
  170. 'time' => $time,
  171. 'links' => array(),
  172. 'linklabel' => array(),
  173. 'media' => array(),
  174. 'lang' => $lang
  175. );
  176. }
  177. }
  178.  
  179. // now get links and media
  180. foreach($pages as $pid => $item) {
  181. // get instructions
  182. $ins = p_cached_instructions(wikiFN($pid), false, $pid);
  183. // find links and media usage
  184. foreach($ins as $i) {
  185. $mid = null;
  186.  
  187. if($i[0] == 'internallink') {
  188. $id = $i[1][0];
  189. $label = $i[1][1];
  190. $exists = true;
  191. resolve_pageid($item['ns'], $id, $exists);
  192. list($id) = explode('#', $id, 2);
  193. if($id == $pid) continue; // skip self references
  194. if($exists && isset($pages[$id])) {
  195. $pages[$pid]['links'][] = $id;
  196. $pages[$pid]['linklabel'][] = $label;
  197. }
  198. if(is_array($i[1][1]) && $i[1][1]['type'] == 'internalmedia') {
  199. $mid = $i[1][1]['src']; // image link
  200. } else {
  201. continue; // we're done here
  202. }
  203. }
  204.  
  205. if($i[0] == 'internalmedia') {
  206. $mid = $i[1][0];
  207. }
  208.  
  209. if(is_null($mid)) continue;
  210. if($incmedia == 'none') continue; // no media wanted
  211.  
  212. $exists = true;
  213. resolve_mediaid($item['ns'], $mid, $exists);
  214. list($mid) = explode('#', $mid, 2);
  215. $mid = cleanID($mid);
  216.  
  217. if($exists) {
  218. if($incmedia == 'all') {
  219. if(!isset($media[$mid])) { //add node
  220. $media[$mid] = array(
  221. 'size' => filesize(mediaFN($mid)),
  222. 'time' => filemtime(mediaFN($mid)),
  223. 'ns' => getNS($mid),
  224. 'title' => noNS($mid),
  225. );
  226. }
  227. $pages[$pid]['media'][] = $mid;
  228. } elseif(isset($media[$mid])) {
  229. $pages[$pid]['media'][] = $mid;
  230. }
  231. }
  232. }
  233.  
  234. // clean up duplicates
  235. $pages[$pid]['links'] = array_unique($pages[$pid]['links']);
  236. $pages[$pid]['media'] = array_unique($pages[$pid]['media']);
  237. }
  238.  
  239. return array('pages' => $pages, 'media' => $media);
  240.  
  241.  
  242. }
  243.  
  244. /**
  245.   * Create a Graphviz dot representation
  246.   *
  247.   * @param array $data
  248.   * @param resource $fh
  249.   */
  250. protected function create_dot(&$data, $fh) {
  251. $pages =& $data['pages'];
  252. $media =& $data['media'];
  253.  
  254. fwrite($fh, "digraph G {\n");
  255. fwrite($fh, "labelloc=\"b\";\nlabel=\"Actors of a Persecution - https:\/\/challengepower.info - #FreeAssange\";");
  256. fwrite($fh, "bgcolor=\"white\";
  257. node [shape = \"circle\", fontsize=16, color=black, bgcolor=aquamarine];
  258. edge [arrowsize=2, color=black];
  259. root=\"page-the_actors:assange\";
  260.  
  261. subgraph corporations {
  262. \"page-the_actors:corporations:darktrace\" ,
  263. \"page-the_actors:corporations:thales\" ,
  264. \"page-the_actors:corporations:sc_strategy_limited\",
  265. \"page-the_actors:corporations:startfor\",
  266. \"page-the_actors:corporations:uc_global_s.l\"
  267. }
  268.  
  269. subgraph leaks {
  270. \"page-the_actors:leaks:global_intelligences_files\" ,
  271. \"page-the_actors:leaks:spy_files\",
  272. \"page-the_actors:leaks:vault7\"
  273. }
  274.  
  275. subgraph countries {
  276. \"page-the_actors:ukgov:start\",
  277. \"page-the_actors:usgov:start\",
  278. \"page-the_actors:sweden:start\",
  279. \"page-the_actors:ecuador:start\"
  280.  
  281. }
  282. \n");
  283.  
  284. // create all nodes first
  285. foreach($pages as $id => $page) {
  286. fwrite($fh, " \"page-$id\" [
  287. shape=note,
  288. label=\"{$page['title']}\",\n");
  289. // look for links
  290. if (!empty( $page['links'][0]))
  291. fwrite($fh,"URL=\"/". str_replace(':','/',$page['links'][0]) ."\"");
  292. else fwrite($fh,"URL=\"#\"");
  293. // look for media
  294. if (!empty( $page['media'][0]))
  295. {
  296. $path = explode (":", $page['media'][0]);
  297. $file = count($path );
  298. $folder="";
  299. for($i=0 ; $i < $file-1 ; $i++)
  300. {
  301. $folder = $folder . $path[$i] . "/";
  302. }
  303. fwrite($fh,"\n // " . $page['media'][0] . " ### " . $file ." ### ". $path[0] . " ### ". $path[1]);
  304. fwrite($fh,",\n labelloc=\"b\",\n shapefile=\"".DOKU_INC. "data/media/" . $folder . "300x300-" . $path[$file-1] . "\"");
  305. }
  306. fwrite($fh,";\n ];\n");
  307. }
  308. // var_dump($pages);
  309. /*
  310.   foreach($media as $id => $item) {
  311.   fwrite($fh, " \"media-$id\" [shape=box, label=\"$id\", color=sandybrown, fontname=Helvetica];\n");
  312.   }
  313.   */
  314. // now create all the links
  315. foreach($pages as $id => $page) {
  316. for($n=0;$n<count($page['links']);$n++)
  317. {
  318. fwrite($fh, " \"page-". $id ."\" -> \"page-". $page['links'][$n] ."\" [color=navy;label=\"". $page['linklabel'][$n] ."\"];\n");
  319. }
  320. }
  321. fwrite($fh, "}\n");
  322. }
  323.  
  324. /**
  325.   * Create a GEXF representation
  326.   *
  327.   * @param array $data
  328.   * @param resource $fh
  329.   */
  330. protected function create_gexf(&$data, $fh) {
  331. $pages =& $data['pages'];
  332. $media =& $data['media'];
  333.  
  334. fwrite($fh, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  335. $fh, "<gexf xmlns=\"http://www.gexf.net/1.1draft\" version=\"1.1\"
  336. xmlns:viz=\"http://www.gexf.net/1.1draft/viz\">\n"
  337. );
  338. fwrite($fh, " <meta lastmodifieddate=\"" . date('Y-m-d H:i:s') . "\">\n");
  339. fwrite($fh, " <creator>DokuWiki</creator>\n");
  340. fwrite($fh, " </meta>\n");
  341. fwrite($fh, " <graph mode=\"dynamic\" defaultedgetype=\"directed\">\n");
  342.  
  343. // define attributes
  344. fwrite($fh, " <attributes class=\"node\">\n");
  345. fwrite($fh, " <attribute id=\"title\" title=\"Title\" type=\"string\" />\n");
  346. fwrite($fh, " <attribute id=\"lang\" title=\"Language\" type=\"string\" />\n");
  347. fwrite($fh, " <attribute id=\"ns\" title=\"Namespace\" type=\"string\" />\n");
  348. fwrite($fh, " <attribute id=\"type\" title=\"Type\" type=\"liststring\">\n");
  349. fwrite($fh, " <default>page|media</default>\n");
  350. fwrite($fh, " </attribute>\n");
  351. fwrite($fh, " <attribute id=\"time\" title=\"Created\" type=\"long\" />\n");
  352. fwrite($fh, " <attribute id=\"size\" title=\"File Size\" type=\"long\" />\n");
  353. fwrite($fh, " </attributes>\n");
  354.  
  355. // create all nodes first
  356. fwrite($fh, " <nodes>\n");
  357. foreach($pages as $id => $item) {
  358. $title = htmlspecialchars($item['title']);
  359. $lang = htmlspecialchars($item['lang']);
  360. fwrite($fh, " <node id=\"page-$id\" label=\"$id\" start=\"{$item['time']}\">\n");
  361. fwrite($fh, " <attvalues>\n");
  362. fwrite($fh, " <attvalue for=\"type\" value=\"page\" />\n");
  363. fwrite($fh, " <attvalue for=\"title\" value=\"$title\" />\n");
  364. fwrite($fh, " <attvalue for=\"lang\" value=\"$lang\" />\n");
  365. fwrite($fh, " <attvalue for=\"ns\" value=\"{$item['ns']}\" />\n");
  366. fwrite($fh, " <attvalue for=\"time\" value=\"{$item['time']}\" />\n");
  367. fwrite($fh, " <attvalue for=\"size\" value=\"{$item['size']}\" />\n");
  368. fwrite($fh, " </attvalues>\n");
  369. fwrite($fh, " <viz:shape value=\"square\" />\n");
  370. fwrite($fh, " <viz:color r=\"173\" g=\"216\" b=\"230\" />\n");
  371. fwrite($fh, " </node>\n");
  372. }
  373. foreach($media as $id => $item) {
  374. $title = htmlspecialchars($item['title']);
  375. $lang = htmlspecialchars($item['lang']);
  376. fwrite($fh, " <node id=\"media-$id\" label=\"$id\" start=\"{$item['time']}\">\n");
  377. fwrite($fh, " <attvalues>\n");
  378. fwrite($fh, " <attvalue for=\"type\" value=\"media\" />\n");
  379. fwrite($fh, " <attvalue for=\"title\" value=\"$title\" />\n");
  380. fwrite($fh, " <attvalue for=\"lang\" value=\"$lang\" />\n");
  381. fwrite($fh, " <attvalue for=\"ns\" value=\"{$item['ns']}\" />\n");
  382. fwrite($fh, " <attvalue for=\"time\" value=\"{$item['time']}\" />\n");
  383. fwrite($fh, " <attvalue for=\"size\" value=\"{$item['size']}\" />\n");
  384. fwrite($fh, " </attvalues>\n");
  385. fwrite($fh, " <viz:shape value=\"disc\" />\n");
  386. fwrite($fh, " <viz:color r=\"244\" g=\"164\" b=\"96\" />\n");
  387. fwrite($fh, " </node>\n");
  388. }
  389. fwrite($fh, " </nodes>\n");
  390.  
  391. // now create all the edges
  392. fwrite($fh, " <edges>\n");
  393. $cnt = 0;
  394. foreach($pages as $id => $page) {
  395. foreach($page['links'] as $link) {
  396. $cnt++;
  397. fwrite($fh, " <edge id=\"$cnt\" source=\"page-$id\" target=\"page-$link\" />\n");
  398. }
  399. foreach($page['media'] as $link) {
  400. $cnt++;
  401. fwrite($fh, " <edge id=\"$cnt\" source=\"page-$id\" target=\"media-$link\" />\n");
  402. }
  403. }
  404. fwrite($fh, " </edges>\n");
  405.  
  406. fwrite($fh, " </graph>\n");
  407. fwrite($fh, "</gexf>\n");
  408.  
  409. }
  410.  
  411. /* Create a JSON representation
  412.   */
  413. protected function create_json(&$data, $fh) {
  414.  
  415. $pages =& $data['pages'];
  416. $media =& $data['media'];
  417. $root_name = array_values($pages)[0]["ns"];
  418.  
  419. $output = '{
  420. "root": "'. $root_name .'",
  421. "nodes": ['."\n" ;
  422. $i=0;
  423. // create all nodes first
  424. foreach($pages as $id => $item){
  425. $i++;
  426. $title = htmlspecialchars($item['title']);
  427. $lang = htmlspecialchars($item['lang']);
  428. $output .= "{\n";
  429. $output .= '"id" : "'. $id .'",'."\n";
  430. $output .= '"name" : "'. $title .'",'."\n";
  431. $output .= '"value" : "'. $item['size'] .'",'."\n";
  432. $output .= '"type" : "'. substr($item['ns'],4) .'"'."\n";
  433. if (!empty( $item['media'][0]))
  434. {
  435. $path = explode (":", $item['media'][0]);
  436. $file = count($path );
  437. $folder="";
  438. for($n=0 ; $n < $file-1 ; $n++)
  439. {
  440. $folder = $folder . $path[$n] . "/";
  441. }
  442. $output .= ',"img" : "'. DOKU_INC. "data/media/" . $folder . "300x300-" . $path[$file-1] .'"'."\n";
  443. }
  444. // $output .= '"has_conflict" : "'.$id.'"'."\n";
  445. // $output .= '"description" : "'.$id.'"'."\n";
  446. $output .= "}\n";
  447. if ($i < count($pages)-1) $output .= ',';
  448.  
  449. }
  450.  
  451. // now create all the edges
  452. $output .= '],
  453. "links": ['."\n";
  454.  
  455. $cnt = 0;
  456. foreach($pages as $id => $page){
  457.  
  458. foreach($page['links'] as $link){
  459. if ($cnt > 0) $output .= ',';
  460.  
  461. $output .= '{
  462. "source": "'. $id . '",
  463. "target": "'. $link . '",
  464. "value": "'. $page["linklabel"][$cnt] .'"
  465. }';
  466. $cnt++;
  467. }
  468.  
  469. }
  470.  
  471.  
  472. $output .= " ] }";
  473.  
  474. fwrite($fh, $output);
  475. }
  476.  
  477. }
  478.  
  479. $grapher = new Grapher();
  480. $grapher->run();
You could leave a comment if you were logged in.
  • dev/grapher-php.1604245495.txt.gz
  • Last modified: 2020/11/01 15:44
  • by bab