Can anyone tell me how to convert RTF formatted data into plain text?
I have a column in sql server 2005 table which stores the rtf data. I
want to convert and store this data as plain text in another table.
Answer :
CREATE FUNCTION dbo.fnParseRTF
(
@rtf VARCHAR(8000)
)
RETURNS VARCHAR(8000)
AS
BEGIN
DECLARE @Stage TABLE
(
Chr CHAR(1),
Pos INT
)
INSERT @Stage
(
Chr,
Pos
)
SELECT SUBSTRING(@rtf, Number, 1),
Number
FROM master..spt_values
WHERE Type = 'p'
AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')
DECLARE @Pos1 INT,
@Pos2 INT
SELECT @Pos1 = MIN(Pos),
@Pos2 = MAX(Pos)
FROM @Stage
DELETE
FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)
WHILE 1 = 1
BEGIN
SELECT TOP 1 @Pos1 = s1.Pos,
@Pos2 = s2.Pos
FROM @Stage AS s1
INNER JOIN @Stage AS s2 ON s2.Pos > s1.Pos
WHERE s1.Chr = '{'
AND s2.Chr = '}'
ORDER BY s2.Pos - s1.Pos
IF @@ROWCOUNT = 0
BREAK
DELETE
FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)
UPDATE @Stage
SET Pos = Pos - @Pos2 + @Pos1 - 1
WHERE Pos > @Pos2
SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
END
SET @Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)
WHILE @Pos1 > 0
SELECT @Pos2 = CHARINDEX(' ', @rtf, @Pos1 + 1),
@rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, ''),
@Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)
SELECT @rtf = REPLACE(@rtf, '\pard', ''),
@rtf = REPLACE(@rtf, '\par', ''),
@rtf = LEFT(@rtf, LEN(@rtf) - 1)
SELECT @rtf = REPLACE(@rtf, '\b0 ', ''),
@rtf = REPLACE(@rtf, '\b ', '')
SELECT @rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')
RETURN @rtf
END
I have found this answer from www.bigresource.com.
Thanks you bigresource.
Thanks for that code. I had to make a few mods to get more codes out of the rtf. Revised code posted below.
ReplyDeleteCheers
David
Alter FUNCTION dbo.RtfToText
(
@rtf VARCHAR(8000)
)
RETURNS VARCHAR(8000)
AS
BEGIN
if @Rtf is NULL Return Null
if len(@Rtf)=0 Return Null
If left(@Rtf,1)<>'{' Return @Rtf
DECLARE @Stage TABLE(Chr CHAR(1),Pos INT)
INSERT @Stage(Chr,Pos)
SELECT SUBSTRING(@rtf, Number, 1),Number FROM master..spt_values
WHERE Type = 'p' AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')
DECLARE @Pos1 INT
DECLARE @Pos2 INT
SELECT @Pos1 = MIN(Pos),@Pos2 = MAX(Pos) FROM @Stage
DELETE FROM @Stage WHERE Pos IN (@Pos1, @Pos2)
WHILE 1 = 1
BEGIN
SELECT TOP 1 @Pos1 = s1.Pos, @Pos2 = s2.Pos
FROM @Stage AS s1 INNER JOIN @Stage AS s2 ON s2.Pos > s1.Pos
WHERE s1.Chr = '{' AND s2.Chr = '}'
ORDER BY s2.Pos - s1.Pos
IF @@ROWCOUNT = 0
BREAK
DELETE FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)
UPDATE @Stage SET Pos = Pos - @Pos2 + @Pos1 - 1
WHERE Pos > @Pos2
SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
END
SELECT @rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')
Declare @CurrPosn Int
Declare @FinishPosn Int
Declare @BlankOut Bit
Declare @StrToRepl varchar(20)
Declare @CurrChar Char(1)
Set @CurrPosn=PatIndex('%\%',@Rtf)
While @CurrPosn>0 Begin
Set @StrToRepl=Substring(@Rtf,@CurrPosn,1)
Set @FinishPosn=@CurrPosn+1
While @FinishPosn0 Begin
Set @CurrChar=Substring(@Rtf,@FinishPosn,1)
if @CurrChar=' ' Begin
Set @StrToRepl=@StrToRepl + ' '
Set @FinishPosn=0
End
Else Begin
Set @FinishPosn=@FinishPosn+1
Set @StrToRepl=@StrToRepl + @CurrChar
End
End
Set @Rtf=Replace(@Rtf,@StrToRepl,'')
Set @CurrPosn=PatIndex('%\%',@Rtf)
End
RETURN @rtf
END
Hi. I'm trying to do just this, but have a problem with your script. I think it's just a typing error in your code, but as I'm not very good at this stuff, I need your guidance. When I run the function as entered, I get the error "must declare variable @FinishPosn0".
ReplyDeleteI'm guessing the line
While @FinishPosition0 Begin
should have a comparison of some sort. But is it < 0, > 0 or =0?
Thanks.
Thanks for this nice function ;) And thanks for sharing!
ReplyDeleteGuillaume.
ALTER FUNCTION dbo.RtfToText
ReplyDelete(
@rtf VARCHAR(8000)
)
RETURNS VARCHAR(8000)
AS
BEGIN
if @Rtf is NULL Return Null
if len(@Rtf)=0 Return Null
If left(@Rtf,1)<>'{' Return @Rtf
DECLARE @Stage TABLE(Chr CHAR(1),Pos INT)
INSERT @Stage(Chr,Pos)
SELECT SUBSTRING(@rtf, Number, 1),Number FROM master..spt_values
WHERE Type = 'p' AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')
DECLARE @Pos1 INT
DECLARE @Pos2 INT
SELECT @Pos1 = MIN(Pos),@Pos2 = MAX(Pos) FROM @Stage
DELETE FROM @Stage WHERE Pos IN (@Pos1, @Pos2)
WHILE 1 = 1
BEGIN
SELECT TOP 1 @Pos1 = s1.Pos, @Pos2 = s2.Pos
FROM @Stage AS s1 INNER JOIN @Stage AS s2 ON s2.Pos > s1.Pos
WHERE s1.Chr = '{' AND s2.Chr = '}'
ORDER BY s2.Pos - s1.Pos
IF @@ROWCOUNT = 0
BREAK
DELETE FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)
UPDATE @Stage SET Pos = Pos - @Pos2 + @Pos1 - 1
WHERE Pos > @Pos2
SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
END
SELECT @rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')
Declare @CurrPosn Int
Declare @FinishPosn Int
Declare @BlankOut Bit
Declare @StrToRepl varchar(20)
Declare @CurrChar Char(1)
Set @CurrPosn=PatIndex('%\%',@Rtf)
While @CurrPosn>0 Begin
Set @StrToRepl=Substring(@Rtf,@CurrPosn,1)
Set @FinishPosn=@CurrPosn+1
While @FinishPosn>0 Begin
Set @CurrChar=Substring(@Rtf,@FinishPosn,1)
if @CurrChar=' ' Begin
Set @StrToRepl=@StrToRepl + ' '
Set @FinishPosn=0
End
Else Begin
Set @FinishPosn=@FinishPosn+1
Set @StrToRepl=@StrToRepl + @CurrChar
End
End
Set @Rtf=Replace(@Rtf,@StrToRepl,'')
Set @CurrPosn=PatIndex('%\%',@Rtf)
End
RETURN @rtf
END
error free function for rtf to txt in sql server 2008 r2
ReplyDeleteAppreciate if somebody can explain how to specify the database name, table name, and column name that contains rtf and needs to be converted in text in the code.
ReplyDeleteThanks a lot.
You could also try this:-
ReplyDeletehttp://www.codeproject.com/Tips/821281/Convert-RTF-to-Plain-Text-Revised-Again
If you want to know more details than you can visit-
ReplyDeletehttp://www.prohut.net/pmp/
Great and that i have a neat offer: How Long Does House Renovation Take cost to gut a house
ReplyDelete