From 29cdba0d70d5a4e0453c451981a104668a427abd Mon Sep 17 00:00:00 2001 From: Uwe Bonnes Date: Fri, 27 Oct 2017 16:15:46 +0200 Subject: [PATCH] SWO: Some explanations and a test program. --- UsingSWO | 254 +++++++++++++++++++++ scripts/swolisten.c | 544 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 798 insertions(+) create mode 100644 UsingSWO create mode 100644 scripts/swolisten.c diff --git a/UsingSWO b/UsingSWO new file mode 100644 index 00000000..e582df53 --- /dev/null +++ b/UsingSWO @@ -0,0 +1,254 @@ +SWO is a datastream that comes out of a single pin when the debug interface +is in SWD mode. It can be encoded either using NRZ (UART) or RZ (Manchester) +formats. The pin is a dedicated one that would be used for TDO when the +debug interface is in JTAG mode. On the STM32 it's port PB3. + +When in NRZ mode the SWO data rate that comes out of the chip _must_ match +the rate that the debugger expects. By default on BMP the baudrate is +2.25MBps but that can be changed as an optional parameter to the monitor +traceswo command, like this; + +monitor traceswo 115200 + +....would set the swo output at the low speed of 115kbps. + +We are constrained on maximum input speed by both the capabilities of the +BMP STM32F103 USART and the ability to get the packets back out over the USB +link. The UART baudrate is set by b=(72x10^6)/(16*d)...so for d=1 that means +a maximum speed of 4.5Mbps. For continious streaming that turns out to be +_too_ fast for the USB link, so the next available option is the 2.25Mbps +that we use. ....you can safely use the 4.5Mbps setting if your debug data +is bursty, or if you're using a different CPU to the STM32F103 as your BMP +host, but you potentially run the risk of losing packets if you have long +runs of sending which the usb cannot flush in time (there's a 12K buffer, so +the it is a pretty long run before it becomes a problem). + +Note that the baudrate equation means there are only certain speeds +available. The highest half dozen are; + +1 4.50 Mbps +2 2.25 Mbps +3 1.50 Mbps +4 1.125 Mbps +5 0.900 Mbps +6 0.750 Mbps + +...the USART will cope with some timing slip, but it's advisible to stay as +close to these values as you can. As the speed comes down the spread between +each valid value so mis-timing is less of an issue. The 'monitor traceswo +' command will automatically find the closest divisor to the value you +set for the speed, so be aware the error could be significant. + +Depending on what you're using to wake up SWO on the target side, you may +need code to get it into the correct mode and emitting data. You can do that +via gdb direct memory accesses, or from program code. + +An example for a STM32F103 for the UART (NRZ) data format that we use; + + /* STM32 specific configuration to enable the TRACESWO IO pin */ + RCC->APB2ENR |= RCC_APB2ENR_AFIOEN; + AFIO->MAPR |= (2 << 24); // Disable JTAG to release TRACESWO + DBGMCU->CR |= DBGMCU_CR_TRACE_IOEN; // Enable IO trace pins + + *((volatile unsigned *)(0xE0040010)) = 31; // Output bits at 72000000/(31+1)=2.25MHz. + *((volatile unsigned *)(0xE00400F0)) = 2; // Use Async mode (1 for RZ/Manchester) + *((volatile unsigned *)(0xE0040304)) = 0; // Disable formatter + + /* Configure instrumentation trace macroblock */ + ITM->LAR = 0xC5ACCE55; + ITM->TCR = 0x00010005; + ITM->TER = 0xFFFFFFFF; // Enable all stimulus ports + +Code for the STM32L476 might look like: +#define BAUDRATE 115200 + DBGMCU->CR |= DBGMCU_CR_TRACE_IOEN; /* Enable IO pins for Async trace */ + uint32_t divisor, clk_frequency; + clk_frequency = NutGetCpuClock(); + divisor = clk_frequency / BAUDRATE; + divisor--; + TPI->CSPSR = 1; /* port size = 1 bit */ + TPI->ACPR = divisor; + TPI->SPPR = 2; /*Use Async mode pin protocol */ + TPI->FFCR = 0x00; /* Bypass the TPIU formatter and send output directly*/ + +/* Configure Trace Port Interface Unit */ + CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk; // Enable access to registers + DWT->CTRL = 0x400003FE; // DWT needs to provide sync for ITM + ITM->LAR = 0xC5ACCE55; // Allow access to the Control Register + ITM->TPR = 0x0000000F; // Trace access privilege from user level code, please + ITM->TCR = 0x0001000D; // ITM_TCR_TraceBusID_Msk | ITM_TCR_DWTENA_Msk | ITM_TCR_SYNCENA_Msk | ITM_TCR_ITMENA_Msk + ITM->TER = 1; // Only Enable stimulus port 1 + + while(1) { + for (uint32_t i = 'A'; i <= 'Z'; i++) { + ITM_SendChar(i); + NutSleep(1); + } + } + +If you're using RZ mode (e.g. on a genuine BMP) then you will need the trace +output speed to be quite a lot lower...in the order of 200kHz by means of +changing the divisor to something like 359. That's because the STM32F103 +doesn't have a dedicated RZ decoder so it all has to be done in +software. The advantage of RZ is that the probe can adapt to the speed of +the target, so you don't have to set the speed on the probe in the monitor +traceswo command, and it will be tolerant of different speeds. + +The SWO data appears on USB Interface 5, Endpoint 5. + +SWOListen +========= +A program swolisten.c is found in ./scripts which will listen to this +endpoint, decode the datastream, and output it to a set of unix fifos which +can then be used as the input to other programs (e.g. cat, or something more +sophisticated like gnuplot, octave or whatever). This program doesn't care +if the data originates from a RZ or NRZ port, or at what speed. + +Note that swolisten can be used with either BMP firmware, or with a +conventional TTL serial dongle. See at the bottom of this file for +information on how to use a dongle. + +The command line to build the swolisten tool is; + +gcc -I /usr/local/include/libusb-1.0 -L /usr/local/lib -lusb-1.0 swolisten.c -o swolisten + +For Opensuse: +gcc -I /usr/include/libusb-1.0 -lusb-1.0 swolisten.c swolisten -std=gnu99 -g -Og + +...you will obviously need to change the paths to your libusb files. + +Attach to BMP to your PC: +Start gdb: "arm-none-eabi-gdb" +Choose bmp as target, like: + "target extended /dev/ttyACM0(*)" +Start SWO output: "mon traceswo" +If async SWO is used, give the baudrate your device sends +out as argument. 2.25 MBaud is the default, for the STM32L476 example above +the command would be: "mon traceswo 115200(*)". +Scan the SWD "mon swdp_scan" +Attach to the device: : "attach 1" +Start the program: "r". +(*) Your milage may vary +Now start swolisten without further options. + +By default the tool will create fifos for the first 32 channels in a +directory swo (which you will need to create) as follows; + +>ls swo/ +chan00 chan02 chan04 chan06 chan08 chan0A chan0C chan0E chan10 chan12 chan14 +chan16 chan18 chan1A chan1C chan1E chan01 chan03 chan05 chan07 chan09 chan0B +chan0D chan0F chan11 chan13 chan15 chan17 chan19 chan1B chan1D chan1F + +>cat swo/channel0 +<> + +With the F103 and L476 examples above, an endless stream of +"ABCDEFGHIJKLMNOPQRSTUVWXYZ" should be seen. During reset of the target +device, no output will appear, but with release of reset output restarts. + +Information about command line options can be found with the -h option. +swolisten is specifically designed to be 'hardy' to probe and target +disconnects and restarts (y'know, like you get in the real world). The +intention being to give you streams whenever it can get them. It does _not_ +require gdb to be running. For the time being traceswo is not turned on by +default in the BMP to avoid possible interactions and making the overall +thing less reliable so You do need gdb to send the initial 'monitor +traceswo' to the probe, but beyond that there's no requirement for gdb to be +present. + +Reliability +=========== + +A whole chunk of work has gone into making sure the dataflow over the SWO +link is reliable. The TL;DR is that the link _is_ reliable. There are +factors outside of our control (i.e. the USB bus you connect to) that could +potentially break the reliabilty but there's not too much we can do about +that since the SWO link is unidirectional (no opportunity for +re-transmits). The following section provides evidence for the claim that +the link is good; + +A test 'mule' sends data flat out to the link at the maximum data rate of +2.25Mbps using a loop like the one below; + +while (1) +{ + for (uint32_t r=0; r<26; r++) + { + for (uint32_t g=0; g<31; g++) + { + ITM_SendChar('A'+r); + } + ITM_SendChar('\n'); + } +} + +100MB of data (more than 200MB of actual SWO packets, due to the encoding) was sent from the mule to the BMP where the +output from swolisten chan00 was cat'ted into a file; + +>cat swo/chan00 > o + +....this process was interrupted once the file had grown to 100MB. The first +and last lines were removed from it (these represent previously buffered +data and an incomplete packet at the point where the capture was +interrupted) and the resulting file analysed for consistency; + +> sort o | uniq -c + +The output was; + +126462 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +126462 BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB +126462 CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +126462 DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD +126461 EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE +126461 FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +126461 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +126461 HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH +126461 IIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +126461 JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ +126461 KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK +126461 LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL +126461 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM +126461 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +126461 OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO +126461 PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP +126461 QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ +126461 RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR +126461 SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS +126461 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +126461 UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU +126461 VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV +126461 WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW +126461 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +126461 YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY +126461 ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ + +(On inspection, the last line of recorded data was indeed a 'D' line). + +Swolisten, using a TTL Serial Dongle +==================================== + +The NRZ data that comes out of the SWO is just UART formatted, but in a +frame. swolisten has been extended to accomodate TTL Serial Dongles that +can pick this up. Success has been had with CP2102 dongles at up to 921600 +baud. + +To use this mode just connect SWO to the RX pin of your dongle, and start +swolisten with parmeters representing the speed and port. An example; + +>./swolisten -p /dev/cu.SLAB_USBtoUART -v -b swo/ -s 921600 + +Any individual dongle will only support certain baudrates (Generally +multiples of 115200) so you may have to experiment to find the best +supported ones. For the CP2102 dongle 1.3824Mbps wasn't supported and +1.8432Mbps returned corrupted data. + +Please email dave@marples.net with information about dongles you find work +well and at what speed. + +Further information +=================== +SWO is a wide field. Read e.g. the blogs around SWD on +http://shadetail.com/blog/swo-starting-the-steroids/ +An open source program suite for SWO under active development is +https://github.com/mubes/orbuculum \ No newline at end of file diff --git a/scripts/swolisten.c b/scripts/swolisten.c new file mode 100644 index 00000000..1ce141aa --- /dev/null +++ b/scripts/swolisten.c @@ -0,0 +1,544 @@ +/* + * SWO Splitter for Blackmagic Probe and others. + * ============================================= + * + * This file is part of the Black Magic Debug project. + * + * Copyright (C) 2017 Dave Marples + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define VID (0x1d50) +#define PID (0x6018) +#define INTERFACE (5) +#define ENDPOINT (0x85) + +#define TRANSFER_SIZE (64) +#define NUM_FIFOS 32 +#define MAX_FIFOS 128 + +#define CHANNELNAME "chan" + +#define BOOL char +#define FALSE (0) +#define TRUE (!FALSE) + +// Record for options, either defaults or from command line +struct +{ + BOOL verbose; + BOOL dump; + int nChannels; + char *chanPath; + char *port; + int speed; +} options = {.nChannels=NUM_FIFOS, .chanPath="", .speed=115200}; + +// Runtime state +struct +{ + int fifo[MAX_FIFOS]; +} _r; + +// ==================================================================================================== +// ==================================================================================================== +// ==================================================================================================== +// Internals +// ==================================================================================================== +// ==================================================================================================== +// ==================================================================================================== +static BOOL _runFifo(int portNo, int listenHandle, char *fifoName) + +{ + int pid,fifo; + int readDataLen, writeDataLen; + + if (mkfifo(fifoName,0666)<0) + { + return FALSE; + } + + pid=fork(); + + if (pid==0) + { + char rxdata[TRANSFER_SIZE]; + int fifo; + + /* Don't kill this sub-process when any reader or writer evaporates */ + signal(SIGPIPE, SIG_IGN); + + while (1) + { + /* This is the child */ + fifo=open(fifoName,O_WRONLY); + + while (1) + { + readDataLen=read(listenHandle,rxdata,TRANSFER_SIZE); + if (readDataLen<=0) + { + exit(0); + } + + writeDataLen=write(fifo,rxdata,readDataLen); + if (writeDataLen<=0) + { + break; + } + } + close(fifo); + } + } + else if (pid<0) + { + /* The fork failed */ + return FALSE; + } + return TRUE; +} +// ==================================================================================================== +static BOOL _makeFifoTasks(void) + +/* Create each sub-process that will handle a port */ + +{ + char fifoName[PATH_MAX]; + + int f[2]; + + for (int t=0; t0) + { + close(_r.fifo[t]); + sprintf(fifoName,"%s%s%02X",options.chanPath,CHANNELNAME,t); + unlink(fifoName); + remainingProcesses++; + } + } + + while (remainingProcesses--) + { + waitpid(-1,&statloc,0); + } +} +// ==================================================================================================== +// ==================================================================================================== +// ==================================================================================================== +// Handlers for each message type +// ==================================================================================================== +// ==================================================================================================== +// ==================================================================================================== +void _handleSWIT(uint8_t addr, uint8_t length, uint8_t *d) + +{ + if (addr ",*c,_protoNames[p]); +#endif + + switch (p) + { + // ----------------------------------------------------- + case ITM_IDLE: + if (*c==0b01110000) + { + /* This is an overflow packet */ + if (options.verbose) + fprintf(stderr,"Overflow!\n"); + break; + } + // ********** + if (*c==0) + { + /* This is a sync packet - expect to see 4 more 0's followed by 0x80 */ + targetCount=4; + currentCount=0; + p=ITM_SYNCING; + break; + } + // ********** + if (!(*c&0x0F)) + { + currentCount=1; + /* This is a timestamp packet */ + rxPacket[0]=*c; + + if (!(*c&0x80)) + { + /* A one byte output */ + _handleTS(currentCount,rxPacket); + } + else + { + p=ITM_TS; + } + break; + } + // ********** + if ((*c&0x0F) == 0x04) + { + /* This is a reserved packet */ + break; + } + // ********** + if (!(*c&0x04)) + { + /* This is a SWIT packet */ + if ((targetCount=*c&0x03)==3) + targetCount=4; + srcAddr=(*c&0xF8)>>3; + currentCount=0; + p=ITM_SWIT; + break; + } + // ********** + if (options.verbose) + fprintf(stderr,"Illegal packet start in IDLE state\n"); + break; + // ----------------------------------------------------- + case ITM_SWIT: + rxPacket[currentCount]=*c; + currentCount++; + + if (currentCount>=targetCount) + { + p=ITM_IDLE; + _handleSWIT(srcAddr, targetCount, rxPacket); + } + break; + // ----------------------------------------------------- + case ITM_TS: + rxPacket[currentCount++]=*c; + if (!(*c&0x80)) + { + /* We are done */ + _handleTS(currentCount,rxPacket); + } + else + { + if (currentCount>4) + { + /* Something went badly wrong */ + p=ITM_IDLE; + } + break; + } + + // ----------------------------------------------------- + case ITM_SYNCING: + if ((*c==0) && (currentCount

\n",progName); + printf(" b: for channels\n"); + printf(" h: This help\n"); + printf(" d: Dump received data without further processing\n"); + printf(" n: of channels to populate\n"); + printf(" p: to use\n"); + printf(" s: to use\n"); + printf(" v: Verbose mode\n"); +} +// ==================================================================================================== +int _processOptions(int argc, char *argv[]) + +{ + int c; + while ((c = getopt (argc, argv, "vdn:b:hp:s:")) != -1) + switch (c) + { + case 'v': + options.verbose = 1; + break; + case 'd': + options.dump = 1; + break; + case 'p': + options.port=optarg; + break; + case 's': + options.speed=atoi(optarg); + break; + case 'h': + _printHelp(argv[0]); + return FALSE; + case 'n': + options.nChannels=atoi(optarg); + if ((options.nChannels<1) || (options.nChannels>MAX_FIFOS)) + { + fprintf(stderr,"Number of channels out of range (1..%d)\n",MAX_FIFOS); + return FALSE; + } + break; + case 'b': + options.chanPath = optarg; + break; + case '?': + if (optopt == 'b') + fprintf (stderr, "Option '%c' requires an argument.\n", optopt); + else if (!isprint (optopt)) + fprintf (stderr,"Unknown option character `\\x%x'.\n", optopt); + return FALSE; + default: + return FALSE; + } + + if (options.verbose) + { + fprintf(stdout,"Verbose: TRUE\nBasePath: %s\n",options.chanPath); + if (options.port) + { + fprintf(stdout,"Serial Port: %s\nSerial Speed: %d\n",options.port,options.speed); + } + } + return TRUE; +} +// ==================================================================================================== +int usbFeeder(void) + +{ + + unsigned char cbw[TRANSFER_SIZE]; + libusb_device_handle *handle; + libusb_device *dev; + int size; + + while (1) + { + if (libusb_init(NULL) < 0) + { + fprintf(stderr,"Failed to initalise USB interface\n"); + return (-1); + } + + while (!(handle = libusb_open_device_with_vid_pid(NULL, VID, PID))) + { + usleep(500000); + } + + if (!(dev = libusb_get_device(handle))) + continue; + + if (libusb_claim_interface (handle, INTERFACE)<0) + continue; + + while (0==libusb_bulk_transfer(handle, ENDPOINT, cbw, TRANSFER_SIZE, &size, 10)) + { + unsigned char *c=cbw; + if (options.dump) + printf(cbw); + else + while (size--) + _protocolPump(c++); + } + + libusb_close(handle); + } +} +// ==================================================================================================== +int serialFeeder(void) + +{ + int f; + unsigned char cbw[TRANSFER_SIZE]; + ssize_t t; + struct termios settings; + + while (1) + { + while ((f=open(options.port,O_RDONLY))<0) + { + if (options.verbose) + { + fprintf(stderr,"Can't open serial port\n"); + } + usleep(500000); + } + + if (options.verbose) + { + fprintf(stderr,"Port opened\n"); + } + + if (tcgetattr(f, &settings) <0) + { + perror("tcgetattr"); + return(-3); + } + + if (cfsetspeed(&settings, options.speed)<0) + { + perror("Setting input speed"); + return -3; + } + settings.c_lflag &= ~(ICANON | ECHO | ECHOE | ISIG); + settings.c_cflag &= ~PARENB; /* no parity */ + settings.c_cflag &= ~CSTOPB; /* 1 stop bit */ + settings.c_cflag &= ~CSIZE; + settings.c_cflag |= CS8 | CLOCAL; /* 8 bits */ + settings.c_oflag &= ~OPOST; /* raw output */ + + if (tcsetattr(f, TCSANOW, &settings)<0) + { + fprintf(stderr,"Unsupported baudrate\n"); + exit(-3); + } + + tcflush(f, TCOFLUSH); + + while ((t=read(f,cbw,TRANSFER_SIZE))>0) + { + unsigned char *c=cbw; + while (t--) + _protocolPump(c++); + } + if (options.verbose) + { + fprintf(stderr,"Read failed\n"); + } + close(f); + } +} +// ==================================================================================================== +int main(int argc, char *argv[]) + +{ + if (!_processOptions(argc,argv)) + { + exit(-1); + } + + atexit(_removeFifoTasks); + /* This ensures the atexit gets called */ + signal(SIGINT, intHandler); + if (!_makeFifoTasks()) + { + fprintf(stderr,"Failed to make channel devices\n"); + exit(-1); + } + + /* Using the exit construct rather than return ensures the atexit gets called */ + if (!options.port) + exit(usbFeeder()); + else + exit(serialFeeder()); + fprintf(stderr,"Returned\n"); + exit(0); +} +// ====================================================================================================