Line data Source code
1 : // Copyright (c) 2013-2022 Made to Order Software Corp. All Rights Reserved
2 : //
3 : // https://snapwebsites.org/project/eventdispatcher
4 : // contact@m2osw.com
5 : //
6 : // This program is free software; you can redistribute it and/or modify
7 : // it under the terms of the GNU General Public License as published by
8 : // the Free Software Foundation; either version 2 of the License, or
9 : // (at your option) any later version.
10 : //
11 : // This program is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 : //
16 : // You should have received a copy of the GNU General Public License along
17 : // with this program; if not, write to the Free Software Foundation, Inc.,
18 : // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 :
20 : /** \file
21 : * \brief Implementation of the process_info class.
22 : *
23 : * This file includes code to read data from the `/proc/<pid>/...` folder.
24 : * It parses the data and then saves it in various fields which you can
25 : * retrieve using the various function available in the process_info
26 : * class.
27 : *
28 : * The data is read when necessary and, in many cases, cached. In other
29 : * words, doing a get again may return data that was cached opposed to
30 : * data that was just read.
31 : *
32 : * See the proc(5) manual page for details about all the fields found in
33 : * the files under `/proc/<pid>/...`.
34 : *
35 : * \sa https://man7.org/linux/man-pages/man5/proc.5.html
36 : */
37 :
38 : // self
39 : //
40 : #include <cppprocess/process_info.h>
41 :
42 :
43 : // snaplogger lib
44 : //
45 : #include <snaplogger/message.h>
46 :
47 :
48 : // snapdev lib
49 : //
50 : #include <snapdev/file_contents.h>
51 : #include <snapdev/tokenize_string.h>
52 : #include <snapdev/pathinfo.h>
53 :
54 :
55 : // C++ lib
56 : //
57 : #include <iostream>
58 :
59 :
60 : // C lib
61 : //
62 : //#include <proc/readproc.h>
63 : //#include <stdio.h>
64 : //#include <sys/prctl.h>
65 : //#include <sys/wait.h>
66 : //#include <unistd.h>
67 :
68 :
69 : // last include
70 : //
71 : #include <snapdev/poison.h>
72 :
73 :
74 :
75 : namespace cppprocess
76 : {
77 :
78 :
79 :
80 : /** \brief Initialize a process_info object.
81 : *
82 : * This function saves the pid_t of the process. The other functions are
83 : * then able to retrieve process details such as the name, argument list,
84 : * processing times, etc.
85 : *
86 : * Most of the data gets cached, you can also reset a process_info object
87 : * to retrieve the data anew from the corresponding `/proc/<pid>/...` file.
88 : *
89 : * \param[in] pid The pid of the process you're interested in.
90 : */
91 1 : process_info::process_info(pid_t pid)
92 1 : : f_pid(pid)
93 : {
94 1 : }
95 :
96 :
97 : /** \brief Get the process identifier.
98 : *
99 : * This function retrieves the process identifier of this process_info
100 : * object.
101 : *
102 : * This function actually verifies that the process is still alive. If not,
103 : * then it returns -1. Once this function returns -1 once, it will then
104 : * always return -1.
105 : *
106 : * \return The process identifier.
107 : */
108 4 : pid_t process_info::get_pid()
109 : {
110 4 : if(f_pid != -1)
111 : {
112 8 : std::string const filename("/proc/" + std::to_string(f_pid));
113 4 : struct stat st;
114 4 : if(stat(filename.c_str(), &st) != 0)
115 : {
116 0 : f_pid = -1;
117 : }
118 4 : else if(!S_ISDIR(st.st_mode))
119 : {
120 0 : f_pid = -1;
121 : }
122 : }
123 :
124 4 : return f_pid;
125 : }
126 :
127 :
128 : /** \brief Get the parent process identifier.
129 : *
130 : * This function retrieves the parent process identifier of this
131 : * proc_info object.
132 : *
133 : * \return The parent process identifier.
134 : */
135 1 : pid_t process_info::get_ppid()
136 : {
137 1 : load_stat();
138 :
139 1 : return f_ppid;
140 : }
141 :
142 :
143 : /** \brief Get the process main group identifier.
144 : *
145 : * This function returns the process main group identifier. At first this
146 : * is the same as the main group of the user that started the process
147 : * although the process can change that parameter (if given the right to
148 : * do so).
149 : *
150 : * \return The group the process is a part of.
151 : */
152 1 : pid_t process_info::get_pgid()
153 : {
154 1 : load_stat();
155 :
156 1 : return f_pgid;
157 : }
158 :
159 :
160 : /** \brief Get the number of minor & major page faults.
161 : *
162 : * This function retrieves the minor & major number of page faults.
163 : *
164 : * \param[out] major The major page fault since last update.
165 : * \param[out] minor The minor page fault since last update.
166 : *
167 : * \return The process page fault statistics.
168 : */
169 1 : void process_info::get_page_faults(std::uint64_t & major, std::uint64_t & minor)
170 : {
171 1 : load_stat();
172 :
173 1 : minor = f_minor_faults;
174 1 : major = f_major_faults;
175 1 : }
176 :
177 :
178 : // Where did libproc get that from?! (i.e. you'd need to read all the
179 : // process to compute this, I think)
180 : ///** \brief Get the immediate percent of CPU usage for this process.
181 : // *
182 : // * This function retrieves the CPU usage as a percent of total CPU
183 : // * available.
184 : // *
185 : // * \return The immediate CPU usage as a percent.
186 : // */
187 : //unsigned process_info::get_pcpu() const -- TBD
188 : //{
189 : // return f_pcpu;
190 : //}
191 :
192 :
193 : /** \brief Get the immediate process status.
194 : *
195 : * This function retrieves the CPU status of the process.
196 : *
197 : * The status is one of the following:
198 : *
199 : * \li D -- uninterruptible sleep (usually I/O)
200 : * \li R -- running or runnable
201 : * \li S -- Sleeping
202 : * \li T -- stopped by a job control signal or trace
203 : * \li W -- paging (should not occur)
204 : * \li X -- dead (should never appear)
205 : * \li Z -- defunct zombie process
206 : *
207 : * See `man 5 proc` for more details (versions when such and such flag
208 : * as defined).
209 : *
210 : * \warning
211 : * If you set the \p force parameter to false, then the status is not
212 : * updated. In most cases, you want to first call this function to
213 : * update the `stat` data and then read the other data which will be
214 : * what was read at the same time as this status.
215 : *
216 : * \param[in] force Whether to force a reload of the `stat` file.
217 : *
218 : * \return The status of the process.
219 : */
220 1 : process_state_t process_info::get_state(bool force)
221 : {
222 1 : load_stat(force);
223 :
224 1 : return f_state;
225 : }
226 :
227 :
228 : /** \brief Get the amount of time spent by this process.
229 : *
230 : * This function gives you information about the four variables
231 : * available cummulating the amount of time the process spent
232 : * running so far.
233 : *
234 : * \param[out] utime The accumulated user time of this very task.
235 : * \param[out] stime The accumulated kernel time of this very task.
236 : * \param[out] cutime The accumulated user time of this task and
237 : * its children.
238 : * \param[out] cstime The accumulated kernel time of this task
239 : * and its children.
240 : */
241 1 : void process_info::get_times(
242 : unsigned long long & utime
243 : , unsigned long long & stime
244 : , unsigned long long & cutime
245 : , unsigned long long & cstime)
246 : {
247 1 : load_stat();
248 :
249 1 : utime = f_user_time;
250 1 : stime = f_system_time;
251 1 : cutime = f_children_user_time;
252 1 : cstime = f_children_system_time;
253 1 : }
254 :
255 :
256 : /** \brief Get the real time priority of this process.
257 : *
258 : * This function returns the real time priority of the process.
259 : *
260 : * \return The process real time priority.
261 : */
262 0 : int process_info::get_priority()
263 : {
264 0 : load_stat();
265 :
266 0 : return f_priority;
267 : }
268 :
269 :
270 : /** \brief Get the Unix nice of this process.
271 : *
272 : * This function returns the Unix nice of the process.
273 : *
274 : * \return The process unix nice.
275 : */
276 1 : int process_info::get_nice()
277 : {
278 1 : load_stat();
279 :
280 1 : return f_nice;
281 : }
282 :
283 :
284 : /** \brief Get the size of this process.
285 : *
286 : * This function returns the total size of the process defined as
287 : * the virtual memory size.
288 : *
289 : * \todo
290 : * Look at loading the memory info as in `/proc/<pid>/status`.
291 : *
292 : * \return The process total virtual size.
293 : */
294 1 : std::uint64_t process_info::get_total_size()
295 : {
296 1 : load_stat();
297 :
298 1 : return f_rss
299 1 : + f_end_code - f_start_code
300 1 : + f_end_data - f_start_data;
301 : }
302 :
303 :
304 : /** \brief Get the RSS size of this process.
305 : *
306 : * This function returns the RSS size of the process defined as
307 : * the virtual memory size.
308 : *
309 : * \return The process RSS virtual size.
310 : */
311 1 : std::uint64_t process_info::get_rss_size()
312 : {
313 1 : load_stat();
314 :
315 1 : return f_rss;
316 : }
317 :
318 :
319 : /** \brief Get the process name.
320 : *
321 : * This function return the name as found in the `comm` file. This name
322 : * is usually the first 15 letters of the command name. This name can
323 : * be changed so it may different from the one found in the `cmdline`.
324 : * We most often change our thread names to reflect what they are
325 : * used for.
326 : *
327 : * This is similar to the basename, however, the basename will be
328 : * "calculate" from the `cmdline` file instead.
329 : *
330 : * \note
331 : * The same name is found in the `stat` file (between parenthesis) so
332 : * we read it from there and not the `comm` file.
333 : *
334 : * \return The process name.
335 : */
336 1 : std::string process_info::get_name()
337 : {
338 1 : load_stat();
339 :
340 1 : return f_name;
341 : }
342 :
343 :
344 : /** \brief Get the process command.
345 : *
346 : * This function returns the command path and name as defined on the command
347 : * line.
348 : *
349 : * If something goes wrong (i.e. the process dies) then the function returns
350 : * an empty string.
351 : *
352 : * \return The command name and path.
353 : */
354 2 : std::string process_info::get_command()
355 : {
356 2 : load_cmdline();
357 :
358 2 : if(f_args.empty())
359 : {
360 0 : return std::string();
361 : }
362 :
363 2 : return f_args[0];
364 : }
365 :
366 :
367 : /** \brief Get the process (command) basename.
368 : *
369 : * By default, the process name is the full name used on the command line
370 : * to start this process. If that was a full path, then the full pass is
371 : * included in the process name.
372 : *
373 : * This function returns the basename only.
374 : *
375 : * \return The process basename.
376 : */
377 1 : std::string process_info::get_basename()
378 : {
379 1 : return snapdev::pathinfo::basename(get_command());
380 : }
381 :
382 :
383 : /** \brief Get the number of arguments defined on the command line.
384 : *
385 : * This function counts the number of arguments, including any empty
386 : * arguments.
387 : *
388 : * Count will be positive or null. The count does include the command
389 : * line (program name with index 0). This is why this function is
390 : * expected to return 1 or more. However, if the call happens after
391 : * the process died, then you will get zero.
392 : *
393 : * \return Count the number of arguments.
394 : *
395 : * \sa get_arg()
396 : */
397 10 : std::size_t process_info::get_args_size()
398 : {
399 10 : load_cmdline();
400 :
401 10 : return f_args.size();
402 : }
403 :
404 :
405 : /** \brief Get the argument at the specified index.
406 : *
407 : * This function returns one of the arguments of the command line of
408 : * this process. Note that it happens that arguments are empty strings.
409 : * The very first argument (index of 0) is the command full name, just
410 : * like in `argv[]`.
411 : *
412 : * \note
413 : * If the index is out of bounds, the function returns an empty string.
414 : * To know the number of available arguments, use the get_args_size()
415 : * function first.
416 : *
417 : * \param[in] index The index of the argument to retrieve.
418 : *
419 : * \return The specified argument or an empty string.
420 : *
421 : * \sa get_args_size()
422 : */
423 9 : std::string process_info::get_arg(int index)
424 : {
425 9 : if(static_cast<std::size_t>(index) >= get_args_size())
426 : {
427 0 : return std::string();
428 : }
429 :
430 9 : return f_args[index];
431 : }
432 :
433 :
434 : /** \brief Get the controlling terminal major/minor of this process.
435 : *
436 : * This function returns the TTY device major and minor numbers.
437 : *
438 : * This is the controlling terminal. It may return zeroes in which case
439 : * there is no controlling terminal attached to that process.
440 : *
441 : * \param[out] major The major device number of the controlling terminal.
442 : * \param[out] minor The minor device number of the controlling terminal.
443 : */
444 1 : void process_info::get_tty(int & major, int & minor)
445 : {
446 1 : load_stat();
447 :
448 1 : major = f_tty_major;
449 1 : minor = f_tty_minor;
450 1 : }
451 :
452 :
453 :
454 :
455 :
456 :
457 : /** \brief Load the `stat` file.
458 : *
459 : * This function loads the `/proc/<pid>/stat` file.
460 : *
461 : * If force is false (the default) and the file was already read, it does
462 : * not get reloaded. If you want the most current data for this process,
463 : * make sure to call the get_state() function with true first, then call
464 : * the other functions which will then get the updated data.
465 : *
466 : * \note
467 : * The functions that use the data read by the function make sure to
468 : * call it. The fields that are not defined by your Linux kernel will
469 : * generally be set to 0 and never change.
470 : *
471 : * \param[in] force Whether to force a reload.
472 : */
473 10 : void process_info::load_stat(bool force)
474 : {
475 : // already read?
476 : //
477 10 : if(f_ppid != -1
478 9 : && !force)
479 : {
480 8 : return;
481 : }
482 :
483 : // still active?
484 : //
485 2 : pid_t const pid(get_pid());
486 2 : if(pid == -1)
487 : {
488 0 : return;
489 : }
490 :
491 : // read stat
492 : //
493 4 : snapdev::file_contents s("/proc/" + std::to_string(pid) + "/stat");
494 2 : s.size_mode(snapdev::file_contents::size_mode_t::SIZE_MODE_READ);
495 2 : if(!s.read_all())
496 : {
497 0 : return;
498 : }
499 :
500 : // first we must extract the name because it can include spaces and
501 : // parenthesis so it completely breaks the rest of the parser otherwise
502 : //
503 4 : std::string line(s.contents());
504 2 : std::string::size_type const first_paren(line.find('('));
505 2 : std::string::size_type const last_paren(line.rfind(')'));
506 :
507 : // name not found!?
508 : //
509 2 : if(first_paren < 2
510 2 : || first_paren == std::string::npos
511 2 : || last_paren > 100)
512 : {
513 0 : return;
514 : }
515 :
516 : // pid mismatch?!
517 : //
518 2 : if(line.substr(0, first_paren - 1) != std::to_string(pid))
519 : {
520 0 : return;
521 : }
522 :
523 : // retrieve name
524 : //
525 2 : f_name = line.substr(first_paren + 1, last_paren - first_paren - 1);
526 :
527 : // retrieve the remaining fields (many)
528 : //
529 : // TODO: I don't think we need to (1) tokenize and then (2) convert
530 : // to integers, instead we want to consider a function which
531 : // converts directly to integers and saves the values to the
532 : // vector
533 : //
534 4 : std::vector<std::string> fields;
535 4 : std::string remaining(line.substr(last_paren + 2));
536 2 : snapdev::tokenize_string(fields, remaining, " ");
537 :
538 2 : if(fields.size() >= 1)
539 : {
540 2 : f_state = static_cast<process_state_t>(fields[0][0]);
541 : }
542 : else
543 : {
544 0 : f_state = process_state_t::PROCESS_STATE_UNKNOWN;
545 : }
546 :
547 2 : if(fields.size() <= 1)
548 : {
549 0 : return;
550 : }
551 :
552 : // convert the fields to numbers
553 : // except for the status (first in `fields`), all are positive numbers
554 : //
555 4 : std::vector<std::int64_t> values;
556 2 : values.reserve(fields.size() - 1);
557 4 : std::transform(
558 4 : fields.cbegin() + 1
559 : , fields.cend()
560 : , std::back_inserter(values)
561 102 : , [](auto const & v) { return std::stoull(v); });
562 :
563 : // since the vector starts with the PPID, I include an offset
564 : // so that way the index looks like the one found in the docs
565 : //
566 2 : constexpr int const field_offset(4);
567 :
568 : // the number of values can vary so we use a function to make sure
569 : // we don't go over the maximum number of values available in our
570 : // vector
571 : //
572 88 : auto get_value = [values](int idx) {
573 84 : std::size_t const offset(idx - field_offset);
574 84 : return offset >= values.size()
575 168 : ? 0
576 168 : : values[offset];
577 4 : };
578 :
579 : // TBD: for the below values we could also consider using an array and
580 : // use indices defined in an enum and then have a form of mapping
581 : // which would ease updates (although I don't think the kernel
582 : // makes changes to those values much of the time)
583 :
584 2 : f_ppid = get_value(4);
585 2 : f_pgid = get_value(5);
586 2 : f_session = get_value(6);
587 :
588 2 : std::uint32_t const tty(get_value(7));
589 2 : f_tty_major = (tty >> 8) & 0xffff;
590 4 : f_tty_minor = ((tty >> 16) & 0xff00)
591 2 : | ((tty >> 0) & 0x00ff);
592 :
593 2 : f_fp_group = get_value(8);
594 2 : f_kernel_flags = get_value(9); // see PF_* in /usr/src/linux-headers-<version>/include/linux/sched.h
595 2 : f_minor_faults = get_value(10);
596 2 : f_children_minor_faults = get_value(11);
597 2 : f_major_faults = get_value(12);
598 2 : f_children_major_faults = get_value(13);
599 2 : f_user_time = get_value(14);
600 2 : f_system_time = get_value(15);
601 2 : f_children_user_time = get_value(16);
602 2 : f_children_system_time = get_value(17);
603 2 : f_priority = get_value(18);
604 2 : f_nice = get_value(19);
605 2 : f_num_threads = get_value(20); // earlier versions of Linux used this field for something else
606 : // skip 21
607 2 : f_start_time = get_value(22);
608 2 : f_virtual_size = get_value(23);
609 2 : f_rss = get_value(24);
610 2 : f_rss_limit = get_value(25);
611 2 : f_start_code = get_value(26);
612 2 : f_end_code = get_value(27);
613 2 : f_start_stack = get_value(28);
614 2 : f_kernel_esp = get_value(29);
615 2 : f_kernel_eip = get_value(30);
616 : // skip 31
617 : // skip 32
618 : // skip 33
619 : // skip 34
620 2 : f_wchan = get_value(35);
621 : // skip 36
622 : // skip 37
623 2 : f_exit_signal = get_value(38);
624 2 : f_processor = get_value(39);
625 2 : f_rt_priority = get_value(40);
626 2 : f_schedule_policy = get_value(41); // see SCHED_* in /usr/src/linux-headers-<version>/include/linux/sched.h
627 2 : f_delayacct_blkio_ticks = get_value(42);
628 2 : f_guest_time = get_value(43);
629 2 : f_children_guest_time = get_value(44);
630 2 : f_start_data = get_value(45);
631 2 : f_end_data = get_value(46);
632 2 : f_start_break = get_value(47);
633 2 : f_arg_start = get_value(48);
634 2 : f_arg_end = get_value(49);
635 2 : f_env_start = get_value(50);
636 2 : f_env_end = get_value(51);
637 2 : f_exit_code = get_value(52);
638 : }
639 :
640 :
641 : /** \brief Load the command line.
642 : *
643 : * This function loads the command line and arguments once. It will be
644 : * cached when further calls happen.
645 : *
646 : * It will fill the f_args vector of strings. 0 will be the full path and
647 : * command name. The other strings are the arguments. The array is not
648 : * ended with a nullptr. Use the vector::size() to know whether you reached
649 : * the end or not.
650 : */
651 12 : void process_info::load_cmdline()
652 : {
653 : // already loaded?
654 : //
655 12 : if(!f_args.empty())
656 : {
657 11 : return;
658 : }
659 :
660 : // still active?
661 : //
662 1 : pid_t const pid(get_pid());
663 1 : if(pid == -1)
664 : {
665 0 : return;
666 : }
667 :
668 : // read cmdline
669 : //
670 2 : snapdev::file_contents cl("/proc/" + std::to_string(pid) + "/cmdline");
671 1 : cl.size_mode(snapdev::file_contents::size_mode_t::SIZE_MODE_READ);
672 1 : if(!cl.read_all())
673 : {
674 0 : return;
675 : }
676 :
677 2 : std::string const cmdline(cl.contents());
678 :
679 : // the following gives us the ability to handle the last string even if
680 : // not terminated with a null character
681 : //
682 1 : char const * l(cmdline.c_str());
683 1 : char const * e(l + cmdline.length());
684 : for(;;)
685 : {
686 10 : char const * s(l);
687 426 : while(l < e && *l != '\0')
688 : {
689 208 : ++l;
690 : }
691 10 : f_args.emplace_back(s, static_cast<std::string::size_type>(l - s));
692 10 : ++l;
693 10 : if(l >= e)
694 : {
695 1 : break;
696 : }
697 9 : }
698 : }
699 :
700 :
701 :
702 :
703 :
704 :
705 :
706 6 : } // namespace cppprocess
707 : // vim: ts=4 sw=4 et
|