Cppcheck
tokenize.h
Go to the documentation of this file.
1 /*
2  * Cppcheck - A tool for static C/C++ code analysis
3  * Copyright (C) 2007-2024 Cppcheck team.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 //---------------------------------------------------------------------------
20 #ifndef tokenizeH
21 #define tokenizeH
22 //---------------------------------------------------------------------------
23 
24 #include "config.h"
25 #include "tokenlist.h"
26 
27 #include <iosfwd>
28 #include <list>
29 #include <map>
30 #include <string>
31 #include <vector>
32 
33 class Settings;
34 class SymbolDatabase;
35 class TimerResults;
36 class Token;
37 class TemplateSimplifier;
38 class ErrorLogger;
39 struct Directive; // IWYU pragma: keep
40 enum class Severity;
41 
42 /// @addtogroup Core
43 /// @{
44 
45 /** @brief The main purpose is to tokenize the source code. It also has functions that simplify the token list */
47 
48  friend class SymbolDatabase;
49  friend class TemplateSimplifier;
50 
51  friend class TestSimplifyTemplate;
52  friend class TestSimplifyTypedef;
53  friend class TestTokenizer;
54 
55 public:
56  explicit Tokenizer(const Settings & settings, ErrorLogger &errorLogger);
57  ~Tokenizer();
58 
60  mTimerResults = tr;
61  }
62 
63  /** Is the code C. Used for bailouts */
64  bool isC() const {
65  return list.isC();
66  }
67 
68  /** Is the code CPP. Used for bailouts */
69  bool isCPP() const {
70  return list.isCPP();
71  }
72 
73  /**
74  * Check if inner scope ends with a call to a noreturn function
75  * \param endScopeToken The '}' token
76  * \param unknown set to true if it's unknown if the scope is noreturn
77  * \return true if scope ends with a function call that might be 'noreturn'
78  */
79  bool isScopeNoReturn(const Token *endScopeToken, bool *unknown = nullptr) const;
80 
81  bool simplifyTokens1(const std::string &configuration);
82 
83 private:
84  /** Set variable id */
85  void setVarId();
86  void setVarIdPass1();
87  void setVarIdPass2();
88 
89  /**
90  * Basic simplification of tokenlist
91  *
92  * @param FileName The filename to run; used to do
93  * markup checks.
94  *
95  * @return false if there is an error that requires aborting
96  * the checking of this file.
97  */
98  bool simplifyTokenList1(const char FileName[]);
99 
100  /**
101  * If --check-headers=no has been given; then remove unneeded code in headers.
102  * - All executable code.
103  * - Unused types/variables/etc
104  */
105  void simplifyHeadersAndUnusedTemplates();
106 
107  /**
108  * Remove extra "template" keywords that are not used by Cppcheck
109  */
110  void removeExtraTemplateKeywords();
111 
112 
113  /** Split up template right angle brackets.
114  * foo < bar < >> => foo < bar < > >
115  */
116  void splitTemplateRightAngleBrackets(bool check);
117 
118 public:
119  /**
120  * Calculates sizeof value for given type.
121  * @param type Token which will contain e.g. "int", "*", or string.
122  * @return sizeof for given type, or 0 if it can't be calculated.
123  */
124  nonneg int sizeOfType(const Token* type) const;
125  nonneg int sizeOfType(const std::string& type) const;
126 
127 private:
129 
130  /** Simplify assignment where rhs is a block : "x=({123;});" => "{x=123;}" */
132 
133  /** Insert array size where it isn't given */
134  void arraySize();
135  void arraySizeAfterValueFlow(); // cppcheck-suppress functionConst
136 
137  /** Simplify labels and 'case|default' syntaxes.
138  */
139  void simplifyLabelsCaseDefault();
140 
141  /** simplify case ranges (gcc extension)
142  */
143  void simplifyCaseRange();
144 
145  /** Remove macros in global scope */
146  void removeMacrosInGlobalScope();
147 
148  void addSemicolonAfterUnknownMacro();
149 
150  // Remove C99 and CPP11 _Pragma(str)
151  void removePragma();
152 
153  /** Remove undefined macro in class definition:
154  * class DLLEXPORT Fred { };
155  * class Fred FINAL : Base { };
156  */
157  void removeMacroInClassDef();
158 
159  /** Add parentheses for sizeof: sizeof x => sizeof(x) */
160  void sizeofAddParentheses();
161 
162  /**
163  * Simplify variable declarations (split up)
164  * \param only_k_r_fpar Only simplify K&R function parameters
165  */
166  void simplifyVarDecl(const bool only_k_r_fpar);
167  void simplifyVarDecl(Token * tokBegin, const Token * const tokEnd, const bool only_k_r_fpar); // cppcheck-suppress functionConst // has side effects
168 
169  /**
170  * Simplify variable initialization
171  * '; int *p(0);' => '; int *p = 0;'
172  */
173  void simplifyInitVar();
174  static Token* initVar(Token* tok);
175 
176  /**
177  * Simplify the location of "static" and "const" qualifiers in
178  * a variable declaration or definition.
179  * Example: "int static const a;" => "static const a;"
180  * Example: "long long const static b;" => "static const long long b;"
181  */
182  void simplifyStaticConst();
183 
184  /**
185  * Simplify multiple assignments.
186  * Example: "a = b = c = 0;" => "a = 0; b = 0; c = 0;"
187  */
188  void simplifyVariableMultipleAssign();
189 
190  /**
191  * Simplify the 'C Alternative Tokens'
192  * Examples:
193  * "if(s and t)" => "if(s && t)"
194  * "while((r bitand s) and not t)" => while((r & s) && !t)"
195  * "a and_eq b;" => "a &= b;"
196  */
197  bool simplifyCAlternativeTokens();
198 
199  /** Add braces to an if-block, for-block, etc.
200  * @return true if no syntax errors
201  */
202  bool simplifyAddBraces();
203 
204  /** Add braces to an if-block, for-block, etc.
205  * for command starting at token including else-block
206  * @return last token of command
207  * or input token in case of an error where no braces are added
208  * or NULL when syntaxError is called
209  */
210  Token * simplifyAddBracesToCommand(Token * tok);
211 
212  /** Add pair of braces to an single if-block, else-block, for-block, etc.
213  * for command starting at token
214  * @return last token of command
215  * or input token in case of an error where no braces are added
216  * or NULL when syntaxError is called
217  */
218  Token * simplifyAddBracesPair(Token *tok, bool commandWithCondition);
219 
220  // Convert "using ...;" to corresponding typedef
221  void simplifyUsingToTypedef();
222 
223  /**
224  * typedef A mytype;
225  * mytype c;
226  *
227  * Becomes:
228  * typedef A mytype;
229  * A c;
230  */
231  void simplifyTypedef();
232  void simplifyTypedefCpp();
233  /**
234  * Move typedef token to the left og the expression
235  */
236  void simplifyTypedefLHS();
237 
238  /**
239  */
240  static bool isMemberFunction(const Token *openParen);
241 
242  /**
243  */
244  bool simplifyUsing();
245  void simplifyUsingError(const Token* usingStart, const Token* usingEnd);
246 
247  /** Simplify useless C++ empty namespaces, like: 'namespace %name% { }'*/
248  void simplifyEmptyNamespaces();
249 
250  /** Simplify "if else" */
251  void elseif();
252 
253  /** Simplify C++17/C++20 if/switch/for initialization expression */
254  void simplifyIfSwitchForInit();
255 
256  /**
257  * Reduces "; ;" to ";", except in "( ; ; )"
258  */
259  void removeRedundantSemicolons();
260 
261  /** Struct simplification
262  * "struct S { } s;" => "struct S { }; S s;"
263  */
264 
266 
267  /**
268  * Remove redundant parentheses:
269  * - "((x))" => "(x)"
270  * - "(function())" => "function()"
271  * - "(delete x)" => "delete x"
272  * - "(delete [] x)" => "delete [] x"
273  * @return true if modifications to token-list are done.
274  * false if no modifications are done.
275  */
276  bool simplifyRedundantParentheses();
277 
278  /**
279  * Simplify functions like "void f(x) int x; {"
280  * into "void f(int x) {"
281  */
282  void simplifyFunctionParameters();
283 
284  /** Simplify function level try blocks:
285  * Convert "void f() try {} catch (int) {}"
286  * to "void f() { try {} catch (int) {} }"
287  */
289 
290  /**
291  * Simplify templates
292  */
293  void simplifyTemplates();
294 
295  void simplifyDoublePlusAndDoubleMinus();
296 
297  void simplifyRedundantConsecutiveBraces();
298 
299  void simplifyArrayAccessSyntax();
300 
301  void simplifyParameterVoid();
302 
303  void fillTypeSizes();
304 
305  void combineOperators();
306 
307  void combineStringAndCharLiterals();
308 
309  void concatenateNegativeNumberAndAnyPositive();
310 
311  void simplifyExternC();
312 
313  void simplifyRoundCurlyParentheses();
314 
315  void simplifyTypeIntrinsics();
316 
317  void simplifySQL();
318 
319  void checkForEnumsWithTypedef();
320 
321  void findComplicatedSyntaxErrorsInTemplates();
322 
323  /**
324  * Modify strings in the token list by replacing hex and oct
325  * values. E.g. "\x61" -> "a" and "\000" -> "\0"
326  * @param source The string to be modified, e.g. "\x61"
327  * @return Modified string, e.g. "a"
328  */
329  static std::string simplifyString(const std::string &source);
330 
331 public:
332  /**
333  * is token pointing at function head?
334  * @param tok A '(' or ')' token in a possible function head
335  * @param endsWith string after function head
336  * @return token matching with endsWith if syntax seems to be a function head else nullptr
337  */
338  static const Token * isFunctionHead(const Token *tok, const std::string &endsWith);
339 
340  bool hasIfdef(const Token *start, const Token *end) const;
341 
342  bool isPacked(const Token * bodyStart) const;
343 
344 private:
345 
346  /** Simplify pointer to standard type (C only) */
347  void simplifyPointerToStandardType();
348 
349  /** Simplify function pointers */
350  void simplifyFunctionPointers();
351 
352  /**
353  * Send error message to error logger about internal bug.
354  * @param tok the token that this bug concerns.
355  */
356  NORETURN void cppcheckError(const Token *tok) const;
357 
358  /**
359  * Setup links for tokens so that one can call Token::link().
360  */
361  void createLinks();
362 
363  /**
364  * Setup links between < and >.
365  */
366  void createLinks2();
367 
368  /**
369  * Set isCast() for C++ casts
370  */
371  void markCppCasts();
372 
373 public:
374 
375  /** Syntax error */
376  NORETURN void syntaxError(const Token *tok, const std::string &code = emptyString) const;
377 
378  /** Syntax error. Unmatched character. */
379  NORETURN void unmatchedToken(const Token *tok) const;
380 
381  /** Syntax error. C++ code in C file. */
382  NORETURN void syntaxErrorC(const Token *tok, const std::string &what) const;
383 
384  /** Warn about unknown macro(s), configuration is recommended */
385  NORETURN void unknownMacroError(const Token *tok1) const;
386 
387  void unhandledCharLiteral(const Token *tok, const std::string& msg) const;
388 
389 private:
390 
391  /** Report that there is an unhandled "class x y {" code */
392  void unhandled_macro_class_x_y(const Token *tok) const;
393 
394  /** Check configuration (unknown macros etc) */
395  void checkConfiguration() const;
396  void macroWithSemicolonError(const Token *tok, const std::string &macroName) const;
397 
398  /**
399  * Is there C++ code in C file?
400  */
401  void validateC() const;
402 
403  /**
404  * assert that tokens are ok - used during debugging for example
405  * to catch problems in simplifyTokenList1/2.
406  */
407  void validate() const;
408 
409  /** Detect unknown macros and throw unknownMacro */
410  void reportUnknownMacros() const;
411 
412  /** Detect garbage code and call syntaxError() if found. */
413  void findGarbageCode() const;
414 
415  /** Detect garbage expression */
416  static bool isGarbageExpr(const Token *start, const Token *end, bool allowSemicolon);
417 
418  /**
419  * Remove __declspec()
420  */
422 
423  /**
424  * Remove calling convention
425  */
427 
428  /**
429  * Remove \__attribute\__ ((?))
430  */
432 
433  /** Get function token for a attribute */
434  Token* getAttributeFuncTok(Token* tok, bool gccattr) const;
435 
436  /**
437  * Remove \__cppcheck\__ ((?))
438  */
440 
441  /** Simplify c++20 spaceship operator */
443 
444  /**
445  * Remove keywords "volatile", "inline", "register", and "restrict"
446  */
448 
449  /**
450  * Remove __asm
451  */
452  void simplifyAsm();
453 
454  /**
455  * asm heuristics, Put ^{} statements in asm()
456  */
457  void simplifyAsm2();
458 
459  /**
460  * Simplify \@&hellip; (compiler extension)
461  */
462  void simplifyAt();
463 
464  /**
465  * Simplify bitfields - the field width is removed as we don't use it.
466  */
468 
469  /**
470  * Remove unnecessary member qualification
471  */
473 
474  /**
475  * Add std:: in front of std classes, when using namespace std; was given
476  */
478 
479  /**
480  * Convert Microsoft memory functions
481  * CopyMemory(dst, src, len) -> memcpy(dst, src, len)
482  * FillMemory(dst, len, val) -> memset(dst, val, len)
483  * MoveMemory(dst, src, len) -> memmove(dst, src, len)
484  * ZeroMemory(dst, len) -> memset(dst, 0, len)
485  */
487 
488  /**
489  * Convert Microsoft string functions
490  * _tcscpy -> strcpy
491  */
493 
494  /**
495  * Remove Borland code
496  */
498 
499  /**
500  * Collapse operator name tokens into single token
501  * operator = => operator=
502  */
504 
505  /** simplify overloaded operators: 'obj(123)' => 'obj . operator() ( 123 )' */
507 
508  /**
509  * Remove [[attribute]] (C++11, C23) from TokenList
510  */
512 
513  /**
514  * Convert namespace aliases
515  */
517 
518  /**
519  * Convert C++17 style nested namespace to older style
520  */
522 
523  /**
524  * Simplify coroutines - just put parentheses around arguments for
525  * co_* keywords so they can be handled like function calls in data
526  * flow.
527  */
529 
530  /**
531  * Prepare ternary operators with parentheses so that the AST can be created
532  * */
534 
535  /**
536  * report error message
537  */
538  void reportError(const Token* tok, const Severity severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
539  void reportError(const std::list<const Token*>& callstack, Severity severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
540 
541  bool duplicateTypedef(Token *&tokPtr, const Token *name, const Token *typeDef) const;
542 
543  void unsupportedTypedef(const Token *tok) const;
544 
545  static void setVarIdClassFunction(const std::string &classname,
546  Token * const startToken,
547  const Token * const endToken,
548  const std::map<std::string, nonneg int> &varlist,
549  std::map<nonneg int, std::map<std::string, nonneg int>>& structMembers,
550  nonneg int &varId_);
551 
552  /**
553  * Output list of unknown types.
554  */
555  void printUnknownTypes() const;
556 
557  /** Find end of SQL (or PL/SQL) block */
558  static const Token *findSQLBlockEnd(const Token *tokSQLStart);
559 
560  static bool operatorEnd(const Token * tok);
561 
562 public:
564  return mSymbolDatabase;
565  }
567 
568  /** print --debug output if debug flags match the simplification:
569  * 0=unknown/both simplifications
570  * 1=1st simplifications
571  * 2=2nd simplifications
572  */
573  void printDebugOutput(int simplification) const;
574 
575  void dump(std::ostream &out) const;
576 
577  Token *deleteInvalidTypedef(Token *typeDef);
578 
579  /**
580  * Get variable count.
581  * @return number of variables
582  */
583  nonneg int varIdCount() const {
584  return mVarId;
585  }
586 
587  /**
588  * Token list: stores all tokens.
589  */
591  // Implement tokens() as a wrapper for convenience when using the TokenList
592  const Token* tokens() const {
593  return list.front();
594  }
595 
597  return list.front();
598  }
599 
600  /**
601  * Helper function to check whether number is one (1 or 0.1E+1 or 1E+0) or not?
602  * @param s the string to check
603  * @return true in case is is one and false otherwise.
604  */
605  static bool isOneNumber(const std::string &s);
606 
607  /**
608  * Helper function to check for start of function execution scope.
609  * Do not use this in checks. Use the symbol database.
610  * @param tok pointer to end parentheses of parameter list
611  * @return pointer to start brace of function scope or nullptr if not start.
612  */
613  static const Token * startOfExecutableScope(const Token * tok);
614 
615  const Settings &getSettings() const {
616  return mSettings;
617  }
618 
619  void calculateScopes();
620 
621  /** Disable copy constructor */
622  Tokenizer(const Tokenizer &) = delete;
623 
624  /** Disable assignment operator */
625  Tokenizer &operator=(const Tokenizer &) = delete;
626 
627  void setDirectives(std::list<Directive> directives);
628 
629 private:
630  const Token *processFunc(const Token *tok2, bool inOperator) const;
631  Token *processFunc(Token *tok2, bool inOperator);
632 
633  /**
634  * Get new variable id.
635  * @return new variable id
636  */
637  nonneg int newVarId() {
638  return ++mVarId;
639  }
640 
641  /** Set pod types */
642  void setPodTypes();
643 
644  /** settings */
646 
647  /** errorlogger */
649 
650  /** Symbol database that all checks etc can use */
651  SymbolDatabase* mSymbolDatabase{};
652 
654 
655  /** E.g. "A" for code where "#ifdef A" is true. This is used to
656  print additional information in error situations. */
657  std::string mConfiguration;
658 
659  /** sizeof information for known types */
660  std::map<std::string, int> mTypeSize;
661 
662  struct TypedefInfo {
663  std::string name;
664  std::string filename;
666  int column;
667  bool used;
668  };
669  std::vector<TypedefInfo> mTypedefInfo;
670 
671  std::list<Directive> mDirectives;
672 
673  /** variable count */
674  nonneg int mVarId{};
675 
676  /** unnamed count "Unnamed0", "Unnamed1", "Unnamed2", ... */
677  nonneg int mUnnamedCount{};
678 
679  /**
680  * TimerResults
681  */
682  TimerResults* mTimerResults{};
683 };
684 
685 /// @}
686 
687 //---------------------------------------------------------------------------
688 #endif // tokenizeH
This is an interface, which the class responsible of error logging should implement.
Definition: errorlogger.h:214
This is just a container for general settings so that we don't need to pass individual values to func...
Definition: settings.h:95
Simplify templates from the preprocessed and partially simplified code.
const Token * front() const
get first token of list
Definition: tokenlist.h:119
The token list that the TokenList generates is a linked-list of this class.
Definition: token.h:150
The main purpose is to tokenize the source code.
Definition: tokenize.h:46
void simplifyOverloadedOperators()
simplify overloaded operators: 'obj(123)' => 'obj .
void simplifyMicrosoftStringFunctions()
Convert Microsoft string functions _tcscpy -> strcpy.
std::string mConfiguration
E.g.
Definition: tokenize.h:657
void simplifyBitfields()
Simplify bitfields - the field width is removed as we don't use it.
nonneg int newVarId()
Get new variable id.
Definition: tokenize.h:637
bool isPacked(const Token *bodyStart) const
void reportError(const std::list< const Token * > &callstack, Severity severity, const std::string &id, const std::string &msg, bool inconclusive=false) const
const Settings & getSettings() const
Definition: tokenize.h:615
const Token * tokens() const
Definition: tokenize.h:592
void simplifyAttribute()
Remove __attribute__ ((?))
static const Token * findSQLBlockEnd(const Token *tokSQLStart)
Find end of SQL (or PL/SQL) block.
Token * tokens()
Definition: tokenize.h:596
void simplifyNamespaceAliases()
Convert namespace aliases.
static bool isGarbageExpr(const Token *start, const Token *end, bool allowSemicolon)
Detect garbage expression.
nonneg int varIdCount() const
Get variable count.
Definition: tokenize.h:583
void removeUnnecessaryQualification()
Remove unnecessary member qualification.
void createSymbolDatabase()
void prepareTernaryOpForAST()
Prepare ternary operators with parentheses so that the AST can be created.
static std::string simplifyString(const std::string &source)
Modify strings in the token list by replacing hex and oct values.
void simplifyFunctionTryCatch()
Simplify function level try blocks: Convert "void f() try {} catch (int) {}" to "void f() { try {} ca...
std::list< Directive > mDirectives
Definition: tokenize.h:671
ErrorLogger & mErrorLogger
errorlogger
Definition: tokenize.h:648
void simplifyCppcheckAttribute()
Remove __cppcheck__ ((?))
Token * getAttributeFuncTok(Token *tok, bool gccattr) const
Get function token for a attribute.
void findGarbageCode() const
Detect garbage code and call syntaxError() if found.
Tokenizer & operator=(const Tokenizer &)=delete
Disable assignment operator.
void simplifyMicrosoftMemoryFunctions()
Convert Microsoft memory functions CopyMemory(dst, src, len) -> memcpy(dst, src, len) FillMemory(dst,...
void setDirectives(std::list< Directive > directives)
void simplifyAssignmentBlock()
Simplify assignment where rhs is a block : "x=({123;});" => "{x=123;}".
std::map< std::string, int > mTypeSize
sizeof information for known types
Definition: tokenize.h:660
TokenList list
Token list: stores all tokens.
Definition: tokenize.h:590
void simplifyNamespaceStd()
Add std:: in front of std classes, when using namespace std; was given.
void simplifyKeyword()
Remove keywords "volatile", "inline", "register", and "restrict".
bool isC() const
Is the code C.
Definition: tokenize.h:64
const SymbolDatabase * getSymbolDatabase() const
Definition: tokenize.h:563
const Settings & mSettings
settings
Definition: tokenize.h:645
static bool operatorEnd(const Token *tok)
void simplifyDeclspec()
Remove __declspec()
void setTimerResults(TimerResults *tr)
Definition: tokenize.h:59
void simplifySpaceshipOperator()
Simplify c++20 spaceship operator.
void printUnknownTypes() const
Output list of unknown types.
void simplifyBorland()
Remove Borland code.
TemplateSimplifier *const mTemplateSimplifier
Definition: tokenize.h:653
void simplifyCallingConvention()
Remove calling convention.
void setPodTypes()
Set pod types.
void reportError(const Token *tok, const Severity severity, const std::string &id, const std::string &msg, bool inconclusive=false) const
report error message
void simplifyAsm()
Remove __asm.
void simplifyDebug()
void simplifyAt()
Simplify @… (compiler extension)
bool isCPP() const
Is the code CPP.
Definition: tokenize.h:69
bool hasIfdef(const Token *start, const Token *end) const
void simplifyStructDecl()
Struct simplification "struct S { } s;" => "struct S { }; S s;".
Tokenizer(const Tokenizer &)=delete
Disable copy constructor.
void simplifyAsm2()
asm heuristics, Put ^{} statements in asm()
void simplifyOperatorName()
Collapse operator name tokens into single token operator = => operator=.
std::vector< TypedefInfo > mTypedefInfo
Definition: tokenize.h:669
void simplifyCoroutines()
Simplify coroutines - just put parentheses around arguments for co_* keywords so they can be handled ...
void simplifyNestedNamespace()
Convert C++17 style nested namespace to older style.
void simplifyCPPAttribute()
Remove [[attribute]] (C++11, C23) from TokenList.
static const std::string emptyString
Definition: config.h:127
#define CPPCHECKLIB
Definition: config.h:35
#define nonneg
Definition: config.h:138
#define NORETURN
Definition: config.h:80
Severity
enum class for severity.
Definition: errortypes.h:63
A preprocessor directive Each preprocessor directive (#include, #define, #undef, #if,...
Definition: preprocessor.h:49
std::string filename
Definition: tokenize.h:664
std::string name
Definition: tokenize.h:663
bool endsWith(const std::string &str, char c)
Definition: utils.h:110