Wednesday, December 1, 2010

SQL SERVER - Convert Rtf To Plain Text In Sql

Can anyone tell me how to convert RTF formatted data into plain text?

I have a column in sql server 2005 table which stores the rtf data. I
want to convert and store this data as plain text in another table.

Answer :

CREATE FUNCTION dbo.fnParseRTF
(
@rtf VARCHAR(8000)
)
RETURNS VARCHAR(8000)
AS
BEGIN
DECLARE @Stage TABLE
(
Chr CHAR(1),
Pos INT
)

INSERT @Stage
(
Chr,
Pos
)
SELECT SUBSTRING(@rtf, Number, 1),
Number
FROM master..spt_values
WHERE Type = 'p'
AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')

DECLARE @Pos1 INT,
@Pos2 INT

SELECT @Pos1 = MIN(Pos),
@Pos2 = MAX(Pos)
FROM @Stage

DELETE
FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)

WHILE 1 = 1
BEGIN
SELECT TOP 1 @Pos1 = s1.Pos,
@Pos2 = s2.Pos
FROM @Stage AS s1
INNER JOIN @Stage AS s2 ON s2.Pos > s1.Pos
WHERE s1.Chr = '{'
AND s2.Chr = '}'
ORDER BY s2.Pos - s1.Pos

IF @@ROWCOUNT = 0
BREAK

DELETE
FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)

UPDATE @Stage
SET Pos = Pos - @Pos2 + @Pos1 - 1
WHERE Pos > @Pos2

SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
END

SET @Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)

WHILE @Pos1 > 0
SELECT @Pos2 = CHARINDEX(' ', @rtf, @Pos1 + 1),
@rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, ''),
@Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)

SELECT @rtf = REPLACE(@rtf, '\pard', ''),
@rtf = REPLACE(@rtf, '\par', ''),
@rtf = LEFT(@rtf, LEN(@rtf) - 1)

SELECT @rtf = REPLACE(@rtf, '\b0 ', ''),
@rtf = REPLACE(@rtf, '\b ', '')

SELECT @rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')

RETURN @rtf
END

I have found this answer from www.bigresource.com.
Thanks you bigresource.

9 comments:

  1. Thanks for that code. I had to make a few mods to get more codes out of the rtf. Revised code posted below.

    Cheers
    David


    Alter FUNCTION dbo.RtfToText
    (
    @rtf VARCHAR(8000)
    )
    RETURNS VARCHAR(8000)
    AS

    BEGIN

    if @Rtf is NULL Return Null
    if len(@Rtf)=0 Return Null
    If left(@Rtf,1)<>'{' Return @Rtf

    DECLARE @Stage TABLE(Chr CHAR(1),Pos INT)

    INSERT @Stage(Chr,Pos)
    SELECT SUBSTRING(@rtf, Number, 1),Number FROM master..spt_values
    WHERE Type = 'p' AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')

    DECLARE @Pos1 INT
    DECLARE @Pos2 INT

    SELECT @Pos1 = MIN(Pos),@Pos2 = MAX(Pos) FROM @Stage
    DELETE FROM @Stage WHERE Pos IN (@Pos1, @Pos2)

    WHILE 1 = 1
    BEGIN
    SELECT TOP 1 @Pos1 = s1.Pos, @Pos2 = s2.Pos
    FROM @Stage AS s1 INNER JOIN @Stage AS s2 ON s2.Pos > s1.Pos
    WHERE s1.Chr = '{' AND s2.Chr = '}'
    ORDER BY s2.Pos - s1.Pos

    IF @@ROWCOUNT = 0
    BREAK

    DELETE FROM @Stage
    WHERE Pos IN (@Pos1, @Pos2)

    UPDATE @Stage SET Pos = Pos - @Pos2 + @Pos1 - 1
    WHERE Pos > @Pos2

    SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
    END

    SELECT @rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')

    Declare @CurrPosn Int
    Declare @FinishPosn Int
    Declare @BlankOut Bit
    Declare @StrToRepl varchar(20)
    Declare @CurrChar Char(1)
    Set @CurrPosn=PatIndex('%\%',@Rtf)
    While @CurrPosn>0 Begin
    Set @StrToRepl=Substring(@Rtf,@CurrPosn,1)
    Set @FinishPosn=@CurrPosn+1
    While @FinishPosn0 Begin
    Set @CurrChar=Substring(@Rtf,@FinishPosn,1)
    if @CurrChar=' ' Begin
    Set @StrToRepl=@StrToRepl + ' '
    Set @FinishPosn=0
    End
    Else Begin
    Set @FinishPosn=@FinishPosn+1
    Set @StrToRepl=@StrToRepl + @CurrChar
    End
    End
    Set @Rtf=Replace(@Rtf,@StrToRepl,'')
    Set @CurrPosn=PatIndex('%\%',@Rtf)
    End

    RETURN @rtf
    END

    ReplyDelete
  2. Hi. I'm trying to do just this, but have a problem with your script. I think it's just a typing error in your code, but as I'm not very good at this stuff, I need your guidance. When I run the function as entered, I get the error "must declare variable @FinishPosn0".

    I'm guessing the line
    While @FinishPosition0 Begin
    should have a comparison of some sort. But is it < 0, > 0 or =0?
    Thanks.

    ReplyDelete
  3. Thanks for this nice function ;) And thanks for sharing!
    Guillaume.

    ReplyDelete
  4. ALTER FUNCTION dbo.RtfToText
    (
    @rtf VARCHAR(8000)
    )
    RETURNS VARCHAR(8000)
    AS

    BEGIN

    if @Rtf is NULL Return Null
    if len(@Rtf)=0 Return Null
    If left(@Rtf,1)<>'{' Return @Rtf

    DECLARE @Stage TABLE(Chr CHAR(1),Pos INT)

    INSERT @Stage(Chr,Pos)
    SELECT SUBSTRING(@rtf, Number, 1),Number FROM master..spt_values
    WHERE Type = 'p' AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')

    DECLARE @Pos1 INT
    DECLARE @Pos2 INT

    SELECT @Pos1 = MIN(Pos),@Pos2 = MAX(Pos) FROM @Stage
    DELETE FROM @Stage WHERE Pos IN (@Pos1, @Pos2)

    WHILE 1 = 1
    BEGIN
    SELECT TOP 1 @Pos1 = s1.Pos, @Pos2 = s2.Pos
    FROM @Stage AS s1 INNER JOIN @Stage AS s2 ON s2.Pos > s1.Pos
    WHERE s1.Chr = '{' AND s2.Chr = '}'
    ORDER BY s2.Pos - s1.Pos

    IF @@ROWCOUNT = 0
    BREAK

    DELETE FROM @Stage
    WHERE Pos IN (@Pos1, @Pos2)

    UPDATE @Stage SET Pos = Pos - @Pos2 + @Pos1 - 1
    WHERE Pos > @Pos2

    SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
    END

    SELECT @rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')

    Declare @CurrPosn Int
    Declare @FinishPosn Int
    Declare @BlankOut Bit
    Declare @StrToRepl varchar(20)
    Declare @CurrChar Char(1)
    Set @CurrPosn=PatIndex('%\%',@Rtf)
    While @CurrPosn>0 Begin
    Set @StrToRepl=Substring(@Rtf,@CurrPosn,1)
    Set @FinishPosn=@CurrPosn+1
    While @FinishPosn>0 Begin
    Set @CurrChar=Substring(@Rtf,@FinishPosn,1)
    if @CurrChar=' ' Begin
    Set @StrToRepl=@StrToRepl + ' '
    Set @FinishPosn=0
    End
    Else Begin
    Set @FinishPosn=@FinishPosn+1
    Set @StrToRepl=@StrToRepl + @CurrChar
    End
    End
    Set @Rtf=Replace(@Rtf,@StrToRepl,'')
    Set @CurrPosn=PatIndex('%\%',@Rtf)
    End

    RETURN @rtf
    END

    ReplyDelete
  5. error free function for rtf to txt in sql server 2008 r2

    ReplyDelete
  6. Appreciate if somebody can explain how to specify the database name, table name, and column name that contains rtf and needs to be converted in text in the code.

    Thanks a lot.

    ReplyDelete
  7. You could also try this:-

    http://www.codeproject.com/Tips/821281/Convert-RTF-to-Plain-Text-Revised-Again

    ReplyDelete
  8. If you want to know more details than you can visit-

    http://www.prohut.net/pmp/

    ReplyDelete
  9. Great and that i have a neat offer: How Long Does House Renovation Take cost to gut a house

    ReplyDelete