A final note about RegEx for ASM instructions syntax check.
The C implementations I have seen appear buggy, despite being cute and small.
The C++ Boost library provides a robust RegEx implementation. It is in C++ but is callable from C.
I tested the same RegEx I have used previously and it worked as expected.
1- C source
#include "common.h"
const char regexp[] = "(?i)(^crc32\\s+((((r|e)a|(r|e)b|(r|e)c|(r|e)d)x)|(((r|e)s|(r|e)d)i)|((r8|r9|r10|r11|r12|r13|r14|r15)d?)))\\s*,\\s*";
const char instructs[NUMBER_OF_STRING][MAX_STRING_SIZE] = { "crc32 ecx,", "CRC32 esi, ", "crc32 r10d , ", "cRC32 r15 , ","Crr32 rdi,", "CrC32 rdi,", "crc32 bx,", "crc32 ebx , " };
int main()
{
dotest(instructs, regexp);
}
2- Header
#pragma once
#define NUMBER_OF_STRING 8
#define MAX_STRING_SIZE 40
#ifdef __cplusplus
extern "C" {
#endif
int dotest(const char strArray[][MAX_STRING_SIZE], const char* pattern);
#ifdef __cplusplus
}
#endif
3- C++ file
#include <boost/regex.hpp>
#include <string>
#include <iostream>
#include "common.h"
using namespace std;
int dotest(const char strArray[][MAX_STRING_SIZE], const char* pattern)
{
boost::regex pat(pattern);
boost::smatch matches;
for (int i = 0; i < NUMBER_OF_STRING; i++)
{
string str(strArray[i]);
if (boost::regex_match(str, matches, pat))
cout << matches[0] << "\t\t matches" << endl;
else
cout << str << "\t\t does not match." << endl;
}
return 0;
}
Output:
crc32 ecx, matches
CRC32 esi, matches
crc32 r10d , matches
cRC32 r15 , matches
Crr32 rdi, does not match.
CrC32 rdi, matches
crc32 bx, does not match.
crc32 ebx , matches
It is also possible to use the Std's regex instead of the Boost library regex, but the pattern does not support case insensitiveness. We need to set a flag for that when declaring the regular expression. Otherwise it works fine too.