Translations of this page?:

This is an old revision of the document!


grapher.php
  1. #!/usr/bin/php
  2. <?php
  3. if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
  4. require_once(DOKU_INC . 'inc/init.php');
  5.  
  6. class Grapher extends DokuCLI {
  7.  
  8. /**
  9.   * Register options and arguments on the given $options object
  10.   *
  11.   * @param DokuCLI_Options $options
  12.   * @return void
  13.   */
  14. protected function setup(DokuCLI_Options $options) {
  15. $options->setHelp('Creates a graph representation of pages and media files and how they are interlinked.');
  16. $options->registerOption(
  17. 'depth',
  18. 'Recursion depth, eg. how deep to look into the given namespaces. Use 0 for all. Default: 1',
  19. 'd', 'depth');
  20. $options->registerOption(
  21. 'media',
  22. "How to handle media files. 'ns' includes only media that is located in the given namespaces, ".
  23. "'all' includes all media files and 'none' ignores the media files completely. ".
  24. "Default: ns",
  25. 'm', 'ns|all|none');
  26. $options->registerOption(
  27. 'format',
  28. "The wanted output format. 'dot' is a very simple format which can be used to visualize the resulting ".
  29. "graph with graphviz. The 'gexf' format is a more complex XML-based format which contains more info ".
  30. "about the found nodes and can be loaded in Gephi. Default: dot",
  31. 'f', 'dot|gexf');
  32. $options->registerOption(
  33. 'output',
  34. "Where to store the output eg. a filename. If not given the output is written to STDOUT.",
  35. 'o', 'file');
  36. $options->registerArgument(
  37. 'namespaces',
  38. "Give all wiki namespaces you want to have graphed. If no namespace is given, the root ".
  39. "namespace is assumed.",
  40. false
  41. );
  42. }
  43.  
  44. /**
  45.   * Your main program
  46.   *
  47.   * Arguments and options have been parsed when this is run
  48.   *
  49.   * @param DokuCLI_Options $options
  50.   * @return void
  51.   */
  52. protected function main(DokuCLI_Options $options) {
  53. $depth = $options->getOpt('depth', 1);
  54. $media = $options->getOpt('media', 'ns');
  55. if(!in_array($media, array('ns', 'all', 'none'))) {
  56. $this->fatal('Bad media option: ' . $media);
  57. }
  58. $format = $options->getOpt('format', 'dot');
  59. if(!in_array($format, array('dot', 'gexf'))) {
  60. $this->fatal('Bad format option: ' . $format);
  61. }
  62. $output = $options->getOpt('output', '-');
  63. if($output == '-') $output = 'php://stdout';
  64.  
  65. $namespaces = array_map('cleanID', $options->args);
  66. if(!count($namespaces)) $namespaces = array(''); //import from top
  67.  
  68. $fh = @fopen($output, 'w');
  69. if(!$fh) $this->fatal("Failed to open $output");
  70.  
  71. $data = $this->gather_data($namespaces, $depth, $media);
  72. if($format == 'dot') {
  73. $this->create_dot($data, $fh);
  74. } elseif($format == 'gexf') {
  75. $this->create_gexf($data, $fh);
  76. }
  77.  
  78. fclose($fh);
  79. }
  80.  
  81. /**
  82.   * Find all the node and edge data for the given namespaces
  83.   * @param $namespaces
  84.   * @param int $depth
  85.   * @param string $incmedia
  86.   * @return array
  87.   */
  88. protected function gather_data($namespaces, $depth = 0, $incmedia = 'ns') {
  89. global $conf;
  90. /** @var helper_plugin_translation $transplugin */
  91. $transplugin = plugin_load('helper', 'translation');
  92.  
  93. $pages = array();
  94. $media = array();
  95. foreach($namespaces as $ns) {
  96. // find media
  97. if($incmedia == 'ns') {
  98. $data = array();
  99. search(
  100. $data,
  101. $conf['mediadir'],
  102. 'search_universal',
  103. 'depth' => $depth,
  104. 'listfiles' => true,
  105. 'listdirs' => false,
  106. 'pagesonly' => false,
  107. 'skipacl' => true,
  108. 'keeptxt' => true,
  109. 'meta' => true,
  110. ),
  111. str_replace(':', '/', $ns)
  112. );
  113.  
  114. // go through all those media files
  115. while($item = array_shift($data)) {
  116. $media[$item['id']] = array(
  117. 'title' => noNS($item['id']),
  118. 'size' => $item['size'],
  119. 'ns' => getNS($item['id']),
  120. 'time' => $item['mtime'],
  121. );
  122. }
  123. }
  124.  
  125. // find pages
  126. $data = array();
  127. search(
  128. $data,
  129. $conf['datadir'],
  130. 'search_universal',
  131. 'depth' => $depth,
  132. 'listfiles' => true,
  133. 'listdirs' => false,
  134. 'pagesonly' => true,
  135. 'skipacl' => true,
  136. 'firsthead' => true,
  137. 'meta' => true,
  138. ),
  139. str_replace(':', '/', $ns)
  140. );
  141.  
  142. // ns start page
  143. if($ns && page_exists($ns)) {
  144. $data[] = array(
  145. 'id' => $ns,
  146. 'ns' => getNS($ns),
  147. 'title' => p_get_first_heading($ns, false),
  148. 'size' => filesize(wikiFN($ns)),
  149. 'mtime' => filemtime(wikiFN($ns)),
  150. 'perm' => 16,
  151. 'type' => 'f',
  152. 'level' => 0,
  153. 'open' => 1,
  154. );
  155. }
  156.  
  157. // go through all those pages
  158. while($item = array_shift($data)) {
  159. $time = (int) p_get_metadata($item['id'], 'date created', false);
  160. if(!$time) $time = $item['mtime'];
  161. $lang = ($transplugin) ? $transplugin->getLangPart($item['id']) : '';
  162.  
  163. if($lang) $item['ns'] = preg_replace('/^' . $lang . '(:|$)/', '', $item['ns']);
  164.  
  165. $pages[$item['id']] = array(
  166. 'title' => $item['title'],
  167. 'ns' => $item['ns'],
  168. 'size' => $item['size'],
  169. 'time' => $time,
  170. 'links' => array(),
  171. 'linklabel' => array(),
  172. 'media' => array(),
  173. 'lang' => $lang
  174. );
  175. }
  176. }
  177.  
  178. // now get links and media
  179. foreach($pages as $pid => $item) {
  180. // get instructions
  181. $ins = p_cached_instructions(wikiFN($pid), false, $pid);
  182. // find links and media usage
  183. foreach($ins as $i) {
  184. $mid = null;
  185.  
  186. if($i[0] == 'internallink') {
  187. $id = $i[1][0];
  188. $label = $i[1][1];
  189. $exists = true;
  190. resolve_pageid($item['ns'], $id, $exists);
  191. list($id) = explode('#', $id, 2);
  192. if($id == $pid) continue; // skip self references
  193. if($exists && isset($pages[$id])) {
  194. $pages[$pid]['links'][] = $id;
  195. $pages[$pid]['linklabel'][] = $label;
  196. }
  197. if(is_array($i[1][1]) && $i[1][1]['type'] == 'internalmedia') {
  198. $mid = $i[1][1]['src']; // image link
  199. } else {
  200. continue; // we're done here
  201. }
  202. }
  203.  
  204. if($i[0] == 'internalmedia') {
  205. $mid = $i[1][0];
  206. }
  207.  
  208. if(is_null($mid)) continue;
  209. if($incmedia == 'none') continue; // no media wanted
  210.  
  211. $exists = true;
  212. resolve_mediaid($item['ns'], $mid, $exists);
  213. list($mid) = explode('#', $mid, 2);
  214. $mid = cleanID($mid);
  215.  
  216. if($exists) {
  217. if($incmedia == 'all') {
  218. if(!isset($media[$mid])) { //add node
  219. $media[$mid] = array(
  220. 'size' => filesize(mediaFN($mid)),
  221. 'time' => filemtime(mediaFN($mid)),
  222. 'ns' => getNS($mid),
  223. 'title' => noNS($mid),
  224. );
  225. }
  226. $pages[$pid]['media'][] = $mid;
  227. } elseif(isset($media[$mid])) {
  228. $pages[$pid]['media'][] = $mid;
  229. }
  230. }
  231. }
  232.  
  233. // clean up duplicates
  234. $pages[$pid]['links'] = array_unique($pages[$pid]['links']);
  235. $pages[$pid]['media'] = array_unique($pages[$pid]['media']);
  236. }
  237. //var_dump($pages);
  238.  
  239. return array('pages' => $pages, 'media' => $media);
  240.  
  241.  
  242. }
  243.  
  244. /**
  245.   * Create a Graphviz dot representation
  246.   *
  247.   * @param array $data
  248.   * @param resource $fh
  249.   */
  250. protected function create_dot(&$data, $fh) {
  251. $pages =& $data['pages'];
  252. $media =& $data['media'];
  253.  
  254. fwrite($fh, "digraph G {\n");
  255. fwrite($fh, "bgcolor=\"white\";
  256. node [shape = \"circle\", fontsize=16, color=black, bgcolor=aquamarine];
  257. edge [arrowsize=2, color=black];
  258. \n");
  259.  
  260. // create all nodes first
  261. foreach($pages as $id => $page) {
  262. fwrite($fh, " \"page-$id\" [
  263. shape=note,
  264. label=\"{$page['title']}\",\n");
  265. // look for links
  266. if (!empty( $page['links'][0]))
  267. fwrite($fh,"URL=\"/". str_replace(':','/',$page['links'][0]) ."\"");
  268. else fwrite($fh,"URL=\"#\"");
  269. // look for media
  270. if (!empty( $page['media'][0]))
  271. {
  272. $path = explode (":", $page['media'][0]);
  273. $file = count($path );
  274. $folder="";
  275. for($i=0 ; $i < $file-1 ; $i++)
  276. {
  277. $folder = $folder . $path[$i] . "/";
  278. }
  279. fwrite($fh,"\n // " . $page['media'][0] . " ### " . $file ." ### ". $path[0] . " ### ". $path[1]);
  280. fwrite($fh,",\n labelloc=\"b\",\n shapefile=\"".DOKU_INC. "data/media/" . $folder . "300x300-" . $path[$file-1] . "\"");
  281. }
  282. fwrite($fh,";\n ];\n");
  283. }
  284. // var_dump($pages);
  285. /*
  286.   foreach($media as $id => $item) {
  287.   fwrite($fh, " \"media-$id\" [shape=box, label=\"$id\", color=sandybrown, fontname=Helvetica];\n");
  288.   }
  289.   */
  290. // now create all the links
  291. foreach($pages as $id => $page) {
  292. foreach($page['links'] as $link) {
  293. foreach($page['linklabel'] as $label) {
  294. fwrite($fh, " \"page-$id\" -> \"page-$link\" [color=navy;label=\"".$label."\"];\n");
  295. }
  296. }
  297. /*
  298.   foreach($page['media'] as $link) {
  299.   fwrite($fh, " \"page-$id\" -> \"media-$link\" [color=firebrick];\n");
  300.   }
  301.   */
  302. }
  303. fwrite($fh, "}\n");
  304. }
  305.  
  306. /**
  307.   * Create a GEXF representation
  308.   *
  309.   * @param array $data
  310.   * @param resource $fh
  311.   */
  312. protected function create_gexf(&$data, $fh) {
  313. $pages =& $data['pages'];
  314. $media =& $data['media'];
  315.  
  316. fwrite($fh, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
  317. $fh, "<gexf xmlns=\"http://www.gexf.net/1.1draft\" version=\"1.1\"
  318. xmlns:viz=\"http://www.gexf.net/1.1draft/viz\">\n"
  319. );
  320. fwrite($fh, " <meta lastmodifieddate=\"" . date('Y-m-d H:i:s') . "\">\n");
  321. fwrite($fh, " <creator>DokuWiki</creator>\n");
  322. fwrite($fh, " </meta>\n");
  323. fwrite($fh, " <graph mode=\"dynamic\" defaultedgetype=\"directed\">\n");
  324.  
  325. // define attributes
  326. fwrite($fh, " <attributes class=\"node\">\n");
  327. fwrite($fh, " <attribute id=\"title\" title=\"Title\" type=\"string\" />\n");
  328. fwrite($fh, " <attribute id=\"lang\" title=\"Language\" type=\"string\" />\n");
  329. fwrite($fh, " <attribute id=\"ns\" title=\"Namespace\" type=\"string\" />\n");
  330. fwrite($fh, " <attribute id=\"type\" title=\"Type\" type=\"liststring\">\n");
  331. fwrite($fh, " <default>page|media</default>\n");
  332. fwrite($fh, " </attribute>\n");
  333. fwrite($fh, " <attribute id=\"time\" title=\"Created\" type=\"long\" />\n");
  334. fwrite($fh, " <attribute id=\"size\" title=\"File Size\" type=\"long\" />\n");
  335. fwrite($fh, " </attributes>\n");
  336.  
  337. // create all nodes first
  338. fwrite($fh, " <nodes>\n");
  339. foreach($pages as $id => $item) {
  340. $title = htmlspecialchars($item['title']);
  341. $lang = htmlspecialchars($item['lang']);
  342. fwrite($fh, " <node id=\"page-$id\" label=\"$id\" start=\"{$item['time']}\">\n");
  343. fwrite($fh, " <attvalues>\n");
  344. fwrite($fh, " <attvalue for=\"type\" value=\"page\" />\n");
  345. fwrite($fh, " <attvalue for=\"title\" value=\"$title\" />\n");
  346. fwrite($fh, " <attvalue for=\"lang\" value=\"$lang\" />\n");
  347. fwrite($fh, " <attvalue for=\"ns\" value=\"{$item['ns']}\" />\n");
  348. fwrite($fh, " <attvalue for=\"time\" value=\"{$item['time']}\" />\n");
  349. fwrite($fh, " <attvalue for=\"size\" value=\"{$item['size']}\" />\n");
  350. fwrite($fh, " </attvalues>\n");
  351. fwrite($fh, " <viz:shape value=\"square\" />\n");
  352. fwrite($fh, " <viz:color r=\"173\" g=\"216\" b=\"230\" />\n");
  353. fwrite($fh, " </node>\n");
  354. }
  355. foreach($media as $id => $item) {
  356. $title = htmlspecialchars($item['title']);
  357. $lang = htmlspecialchars($item['lang']);
  358. fwrite($fh, " <node id=\"media-$id\" label=\"$id\" start=\"{$item['time']}\">\n");
  359. fwrite($fh, " <attvalues>\n");
  360. fwrite($fh, " <attvalue for=\"type\" value=\"media\" />\n");
  361. fwrite($fh, " <attvalue for=\"title\" value=\"$title\" />\n");
  362. fwrite($fh, " <attvalue for=\"lang\" value=\"$lang\" />\n");
  363. fwrite($fh, " <attvalue for=\"ns\" value=\"{$item['ns']}\" />\n");
  364. fwrite($fh, " <attvalue for=\"time\" value=\"{$item['time']}\" />\n");
  365. fwrite($fh, " <attvalue for=\"size\" value=\"{$item['size']}\" />\n");
  366. fwrite($fh, " </attvalues>\n");
  367. fwrite($fh, " <viz:shape value=\"disc\" />\n");
  368. fwrite($fh, " <viz:color r=\"244\" g=\"164\" b=\"96\" />\n");
  369. fwrite($fh, " </node>\n");
  370. }
  371. fwrite($fh, " </nodes>\n");
  372.  
  373. // now create all the edges
  374. fwrite($fh, " <edges>\n");
  375. $cnt = 0;
  376. foreach($pages as $id => $page) {
  377. foreach($page['links'] as $link) {
  378. $cnt++;
  379. fwrite($fh, " <edge id=\"$cnt\" source=\"page-$id\" target=\"page-$link\" />\n");
  380. }
  381. foreach($page['media'] as $link) {
  382. $cnt++;
  383. fwrite($fh, " <edge id=\"$cnt\" source=\"page-$id\" target=\"media-$link\" />\n");
  384. }
  385. }
  386. fwrite($fh, " </edges>\n");
  387.  
  388. fwrite($fh, " </graph>\n");
  389. fwrite($fh, "</gexf>\n");
  390. }
  391.  
  392. }
  393.  
  394. $grapher = new Grapher();
  395. $grapher->run();
You could leave a comment if you were logged in.
  • dev/grapher-php.1601992769.txt.gz
  • Last modified: 2020/10/06 13:59
  • by bab